def test_dropout_WITH_PROB_ZERO(self): rnn = EncoderRNN(self.vocab_size, 50, 16, dropout_p=0) for param in rnn.parameters(): param.data.uniform_(-1, 1) output1, _ = rnn(self.input_var, self.lengths) output2, _ = rnn(self.input_var, self.lengths) self.assertTrue(torch.equal(output1.data, output2.data))
def test_dropout_WITH_PROB_ZERO(self): rnn = EncoderRNN(self.dataset.input_vocab, 50, 16, dropout_p=0) for param in rnn.parameters(): param.data.uniform_(-1, 1) batch = [[1, 2, 3], [1, 2], [1]] output1, _ = rnn(batch) output2, _ = rnn(batch) self.assertEqual(output1, output2)
def loadModel(hidden_size=hidden_size, encoder_n_layers=encoder_n_layers, decoder_n_layers=decoder_n_layers, dropout=dropout, attn_model=attn_model, learning_rate=learning_rate, decoder_learning_ratio=decoder_learning_ratio, directory=SAVE_PATH): state_dict = load_latest_state_dict(directory) episode = state_dict['iteration'] encoder_sd = state_dict['en'] decoder_sd = state_dict['de'] encoder_optimizer_sd = state_dict['en_opt'] decoder_optimizer_sd = state_dict['de_opt'] embedding_sd = state_dict['embedding'] voc = Voc('placeholder_name') voc.__dict__ = state_dict['voc_dict'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) embedding.to(device) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) if device == 'cuda': # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() print('Optimizers built and ready to go!') return episode, encoder, decoder, encoder_optimizer, decoder_optimizer, voc
def test_input_dropout_WITH_NON_ZERO_PROB(self): rnn = EncoderRNN(self.vocab_size, 50, 16, input_dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) equal = True for _ in range(50): output1, _ = rnn(self.input_var, self.lengths) output2, _ = rnn(self.input_var, self.lengths) if not torch.equal(output1.data, output2.data): equal = False break self.assertFalse(equal)
def test_dropout_WITH_NON_ZERO_PROB(self): rnn = EncoderRNN(self.dataset.input_vocab, 50, 16, dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) batch = [[1, 2, 3], [1, 2], [1]] equal = True for _ in range(50): output1, _ = rnn(batch) output2, _ = rnn(batch) if output1 != output2: equal = False break self.assertFalse(equal)
def test_dropout_WITH_NON_ZERO_PROB(self): # It's critical to set n_layer=2 here since dropout won't work # when the RNN only has one layer according to pytorch's doc rnn = EncoderRNN(self.vocab_size, 50, 16, n_layers=2, dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) equal = True for _ in range(50): output1, _ = rnn(self.input_var, self.lengths) output2, _ = rnn(self.input_var, self.lengths) if not torch.equal(output1.data, output2.data): equal = False break self.assertFalse(equal)
def train(): # corpus_name = "cornell movie-dialogs corpus" # corpus = os.path.join(BASE_DIR, "data", corpus_name) # # Define path to new file # datafile = os.path.join(corpus, "formatted_movie_lines.txt") # # delimiter = '\t' # # Unescape the delimiter # delimiter = str(codecs.decode(delimiter, "unicode_escape")) # # # Initialize lines dict, conversations list, and field ids # MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"] # MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"] # # # Load lines and process conversations # print("\nProcessing corpus...") # lines = loadLines(os.path.join(corpus, "movie_lines.txt"), MOVIE_LINES_FIELDS) # print("\nLoading conversations...") # conversations = loadConversations(os.path.join(corpus, "movie_conversations.txt"), # lines, MOVIE_CONVERSATIONS_FIELDS) # # # Write new csv file # print("\nWriting newly formatted file...") # with open(datafile, 'w', encoding='utf-8') as outputfile: # writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n') # for pair in extractSentencePairs(conversations): # writer.writerow(pair) # # # Print a sample of lines # print("\nSample lines from file:") # printLines(datafile) # # # Load/Assemble voc and pairs # save_dir = os.path.join("data", "save") # voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir) # # Print some pairs to validate # print("\npairs:") # for pair in pairs[:10]: # print(pair) # # MIN_COUNT = 3 # Minimum word count threshold for trimming # # # Trim voc and pairs # pairs = trimRareWords(voc, pairs, MIN_COUNT) # save_dir = os.path.join(BASE_DIR, "data", "amazon", "models") save_dir = os.path.join(BASE_DIR, "data", "save") corpus_name = "Alexa" # voc, pairs = loadAlexaData() # _, pairs = loadAlexaData() dataset = AlexaDataset() pairs = dataset.data voc = Voc.from_dataset(dataset) # train_data = AlexaDataset('train.json') # Example for validation small_batch_size = 5 batches = batch2TrainData( voc, [random.choice(pairs) for _ in range(small_batch_size)]) input_variable, lengths, target_variable, mask, max_target_len = batches print("input_variable:", input_variable) print("lengths:", lengths) print("target_variable:", target_variable) print("mask:", mask) print("max_target_len:", max_target_len) # Configure models model_name = 'cb_model' attn_model = 'dot' #attn_model = 'general' #attn_model = 'concat' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 # Set checkpoint to load from; set to None if starting from scratch loadFilename = None checkpoint_iter = 4000 #loadFilename = os.path.join(save_dir, model_name, corpus_name, # '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), # '{}_checkpoint.tar'.format(checkpoint_iter)) # Load model if a loadFilename is provided if loadFilename: # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] else: checkpoint = None print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') # Configure training/optimization clip = 50.0 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_iteration = 4000 print_every = 1 save_every = 500 # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() # Run training iterations print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename, checkpoint, hidden_size)