def __init__(self, config, embedding, vocab, loss_criterion): super().__init__() self.encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout) self.decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, vocab.num_words, config.decoder_n_layers, config.dropout) self.config = config self.bos_id = vocab.bos_id self.loss_criterion = loss_criterion
def __init__(self, hparams): super(Tacotron2, self).__init__() self.style_switch_prob = hparams.style_switch_prob self.contents_switch_prob = hparams.contents_switch_prob self.mask_padding = hparams.mask_padding self.fp16_run = hparams.fp16_run self.n_mel_channels = hparams.n_mel_channels self.n_frames_per_step = hparams.n_frames_per_step self.embedding = nn.Embedding( hparams.n_symbols, hparams.symbols_embedding_dim) std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim)) val = sqrt(3.0) * std # uniform bounds for std self.embedding.weight.data.uniform_(-val, val) self.contents_enc = EncoderRNN(hparams.n_mel_channels, hparams.symbols_embedding_dim//2, 2, emb_type='raw') self.style_enc = EncoderRNN(hparams.n_mel_channels, hparams.style_embedding_dim, 2) self.linear_enc = nn.Linear(hparams.symbols_embedding_dim, hparams.symbols_embedding_dim, bias=False) self.encoder = Encoder(hparams) self.decoder = Decoder(hparams) self.postnet = Postnet(hparams)
def run_evaluation1(corpus_dir, save_dir, datafile, config_file): config = Config.from_json_file(config_file) save_dir = config.save_dir # set checkpoint to load from; set to None if starting from scratch load_filename = os.path.join( save_dir, config.model_name, config.corpus_name, '{}-{}_{}'.format(config.encoder_n_layers, config.decoder_n_layers, config.hidden_size), "last_checkpoint.tar") # if loading on the same machine the model trained on checkpoint = torch.load(load_filename, map_location=lambda storage, loc: storage) # if loading a model trained on gpu to cpu # checkpoint = torch.load(load_filename, map_location=torch.device('cpu')) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] vocab = BPEVocab.from_files(config.bpe_vocab_path, config.bpe_codes_path) print("Building encoder and decoder ...") # initialize word embeddings embedding = nn.Embedding(len(vocab), config.hidden_size) embedding.load_state_dict(embedding_sd) # initialize encoder and decoder models encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout) decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, len(vocab), config.decoder_n_layers, config.dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) # Begin chatting (uncomment and run the following line to begin) evaluate_input1(encoder, decoder, searcher, vocab)
iter / n_iters * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses) def showPlot(points): plt.figure() fig, ax = plt.subplots() # this locator puts ticks at regular intervals loc = ticker.MultipleLocator(base=0.2) ax.yaxis.set_major_locator(loc) plt.plot(points) input_lang, output_lang, pairs = prepareData('eng', 'fra', True) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) trainIters(encoder1, attn_decoder1, 75000, print_every=5000) #trainIters(encoder1, attn_decoder1, n_iters=100, print_every=10, plot_every=10) torch.save(encoder1.state_dict(), 'models/encoder1.bin') torch.save(attn_decoder1.state_dict(), 'models/attn_decoder1.bin')
def run_training1(config_file, load_filename=""): # read data config = Config.from_json_file(config_file) save_dir = config.save_dir datafile = config.datafile vocab = BPEVocab.from_files(config.bpe_vocab_path, config.bpe_codes_path) if load_filename: # if loading on the same machine the model trained on checkpoint = torch.load(load_filename) # if loading a model trained on gpu to cpu # checkpoint = torch.load(load_filename, map_location=torch.device('cpu')) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] vocab.__dict__ = checkpoint["voc_dict"] dataset = CornellMovieDialogDataset(config, paths=[datafile], vocab=vocab) #todo try shuffle=False #todo the batched input tensor to the model is transosed! is there anything wrong? data_loader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dataset.collate_func, drop_last=True) num_tokens = len(vocab) print("Building encoder and decoder ...") # initialize word embeddings embedding = nn.Embedding(num_tokens, config.hidden_size).to(config.device) if load_filename: embedding.load_state_dict(embedding_sd) # initialize encoder and decoder models encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout).to(config.device) decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, num_tokens, config.decoder_n_layers, config.dropout).to(config.device) if load_filename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) print("Models built and ready to go.") ##################################### # ensure dropout layers are in train mode encoder.train() decoder.train() # initilize optimizers print("building optimizers") encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.learning_rate * config.decoder_learning_ratio) if load_filename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) #for epoch in range(config.n_epochs): training_iters1(data_loader, config, vocab, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, save_dir, load_filename, epoch=1)
def run_training(config_file, load_filename=""): # read data config = Config.from_json_file(config_file) save_dir = config.save_dir datafile = config.datafile corpus_dir = config.corpus_dir prepare_data = PrepareData(min_count=config.MIN_COUNT, max_length=config.MAX_LENGTH) vocab, pairs = prepare_data.load_prepare_data(corpus_dir, datafile, save_dir) # set checkpoint to load from; set to None if starting from scratch # load_filename = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), # '{}_checkpoint.tar'.format(checkpoint_iter)) if load_filename: # if loading on the same machine the model trained on checkpoint = torch.load(load_filename) # if loading a model trained on gpu to cpu # checkpoint = torch.load(load_filename, map_location=torch.device('cpu')) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] vocab.__dict__ = checkpoint["voc_dict"] print("Building encoder and decoder ...") # initialize word embeddings embedding = nn.Embedding(vocab.num_words, config.hidden_size).to(config.device) if load_filename: embedding.load_state_dict(embedding_sd) # initialize encoder and decoder models encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout).to(config.device) decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, vocab.num_words, config.decoder_n_layers, config.dropout).to(config.device) if load_filename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) print("Models built and ready to go.") ##################################### # ensure dropout layers are in train mode encoder.train() decoder.train() # initilize optimizers print("building optimizers") encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.learning_rate * config.decoder_learning_ratio) if load_filename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # run training iterations training_iters(config, vocab, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, save_dir, load_filename)
def train_iters(path, n_iters, input_texts, target_texts, input_encoder, target_encoder, device, max_length, print_every=100, plot_every=200, learning_rate=0.0001, momentum=0.99): start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every latent_dim = 512 # Latent dimensionality of the encoding space. encoder_path = path + "encoder.pt" decoder_path = path + "decoder.pt" encoder = torch.load(encoder_path) if Path(encoder_path).exists( ) else EncoderRNN(input_encoder.max, latent_dim).to(device) decoder = torch.load(decoder_path) if Path(decoder_path).exists( ) else SimpleAttnDecoderRNN(latent_dim, target_encoder.max).to(device) # decoder = torch.load(decoder_path) if Path(decoder_path).exists() else AttnDecoderRNN(latent_dim, target_encoder.max, max_length).to(device) # encoder_optimizer = torch.optim.SGD(encoder.parameters(), lr=learning_rate, momentum=momentum, nesterov=True) # decoder_optimizer = torch.optim.SGD(decoder.parameters(), lr=learning_rate, momentum=momentum, nesterov=True) encoder_optimizer = torch.optim.Adam(encoder.parameters(), betas=(momentum, 0.999), lr=learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), betas=(momentum, 0.999), lr=learning_rate) criterion = torch.nn.NLLLoss() for epoch in range(0, 5): print("epoch: " + str(epoch) + '\n') for iter in range(0, n_iters + 1): input_tensor = input_encoder.get_encoding_for_sentence_single_tensor( input_texts[iter]) target_tensor = target_encoder.get_encoding_for_sentence( target_texts[iter]) loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, device, max_length) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 torch.save(encoder, encoder_path) torch.save(decoder, decoder_path) torch.save(encoder_optimizer.state_dict(), path + "encoder_optimizer.pt") torch.save(decoder_optimizer.state_dict(), path + "decoder_optimizer.pt") showPlot(plot_losses)