def load(file_path, dataset): checkpoint = torch.load(file_path, map_location=device) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] embedding_sd = checkpoint['embedding'] voc = Voc(checkpoint['voc_dict']['name']) voc.__dict__ = checkpoint['voc_dict'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') encoder.eval() decoder.eval() policy = RLGreedySearchDecoder(encoder, decoder, voc) env = Env(voc, dataset) return policy, env
def train(): N_EPOCHS = 5 output_size = 1 save_dir = 'data/save/Adversarial_Discriminator/' attn_model = 'dot' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 seq2seqModel = load_latest_state_dict(savepath=SAVE_PATH_SEQ2SEQ) voc = Voc('name') voc.__dict__ = seq2seqModel['voc_dict'] embedding = nn.Embedding(voc.num_words, hidden_size) model = Adversarial_Discriminator(hidden_size, output_size, embedding) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) criterion = nn.BCELoss() encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(seq2seqModel['en']) decoder.load_state_dict(seq2seqModel['de']) encoder = encoder.to(device) decoder = decoder.to(device) searcher = RLGreedySearchDecoder(encoder, decoder, voc) train_data = AlexaDataset('train.json', rare_word_threshold=3) # sorry cornell train_data.trimPairsToVocab(voc) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_data = AlexaDataset('test_freq.json', rare_word_threshold=3) test_data.trimPairsToVocab(voc) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True) for epoch in range(1, N_EPOCHS + 1): test_AdversarialDiscriminatorOnLatestSeq2Seq(model, searcher, test_loader, voc) loss = trainAdversarialDiscriminatorOnLatestSeq2Seq( model, searcher, voc, train_loader, criterion, optimizer, embedding, save_dir, epoch) if epoch % 1 == 0: torch.save( { 'iteration': epoch, 'model': model.state_dict(), 'opt': optimizer.state_dict(), 'loss': loss, 'voc_dict': voc.__dict__, 'embedding': embedding.state_dict() }, os.path.join(save_dir, '{}_{}.tar'.format(epoch, 'epochs')))
def loadModel(hidden_size=hidden_size, encoder_n_layers=encoder_n_layers, decoder_n_layers=decoder_n_layers, dropout=dropout, attn_model=attn_model, learning_rate=learning_rate, decoder_learning_ratio=decoder_learning_ratio, directory=SAVE_PATH): state_dict = load_latest_state_dict(directory) episode = state_dict['iteration'] encoder_sd = state_dict['en'] decoder_sd = state_dict['de'] encoder_optimizer_sd = state_dict['en_opt'] decoder_optimizer_sd = state_dict['de_opt'] embedding_sd = state_dict['embedding'] voc = Voc('placeholder_name') voc.__dict__ = state_dict['voc_dict'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) embedding.to(device) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) if device == 'cuda': # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() print('Optimizers built and ready to go!') return episode, encoder, decoder, encoder_optimizer, decoder_optimizer, voc
def train(): # corpus_name = "cornell movie-dialogs corpus" # corpus = os.path.join(BASE_DIR, "data", corpus_name) # # Define path to new file # datafile = os.path.join(corpus, "formatted_movie_lines.txt") # # delimiter = '\t' # # Unescape the delimiter # delimiter = str(codecs.decode(delimiter, "unicode_escape")) # # # Initialize lines dict, conversations list, and field ids # MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"] # MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"] # # # Load lines and process conversations # print("\nProcessing corpus...") # lines = loadLines(os.path.join(corpus, "movie_lines.txt"), MOVIE_LINES_FIELDS) # print("\nLoading conversations...") # conversations = loadConversations(os.path.join(corpus, "movie_conversations.txt"), # lines, MOVIE_CONVERSATIONS_FIELDS) # # # Write new csv file # print("\nWriting newly formatted file...") # with open(datafile, 'w', encoding='utf-8') as outputfile: # writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n') # for pair in extractSentencePairs(conversations): # writer.writerow(pair) # # # Print a sample of lines # print("\nSample lines from file:") # printLines(datafile) # # # Load/Assemble voc and pairs # save_dir = os.path.join("data", "save") # voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir) # # Print some pairs to validate # print("\npairs:") # for pair in pairs[:10]: # print(pair) # # MIN_COUNT = 3 # Minimum word count threshold for trimming # # # Trim voc and pairs # pairs = trimRareWords(voc, pairs, MIN_COUNT) # save_dir = os.path.join(BASE_DIR, "data", "amazon", "models") save_dir = os.path.join(BASE_DIR, "data", "save") corpus_name = "Alexa" # voc, pairs = loadAlexaData() # _, pairs = loadAlexaData() dataset = AlexaDataset() pairs = dataset.data voc = Voc.from_dataset(dataset) # train_data = AlexaDataset('train.json') # Example for validation small_batch_size = 5 batches = batch2TrainData( voc, [random.choice(pairs) for _ in range(small_batch_size)]) input_variable, lengths, target_variable, mask, max_target_len = batches print("input_variable:", input_variable) print("lengths:", lengths) print("target_variable:", target_variable) print("mask:", mask) print("max_target_len:", max_target_len) # Configure models model_name = 'cb_model' attn_model = 'dot' #attn_model = 'general' #attn_model = 'concat' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 # Set checkpoint to load from; set to None if starting from scratch loadFilename = None checkpoint_iter = 4000 #loadFilename = os.path.join(save_dir, model_name, corpus_name, # '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), # '{}_checkpoint.tar'.format(checkpoint_iter)) # Load model if a loadFilename is provided if loadFilename: # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] else: checkpoint = None print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') # Configure training/optimization clip = 50.0 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_iteration = 4000 print_every = 1 save_every = 500 # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() # Run training iterations print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename, checkpoint, hidden_size)