def main(): with open("data/vocab.pkl", 'rb') as f: vocab = pickle.load(f) img_path = "data/flickr7k_images" cap_path = "data/factual_train.txt" styled_path = "data/humor/funny_train.txt" data_loader = get_data_loader(img_path, cap_path, vocab, 3) styled_data_loader = get_styled_data_loader(styled_path, vocab, 3) encoder = EncoderRNN(voc_size=60376, emb_size=300, hidden_size=300) decoder = FactoredLSTM(300, 512, 512, len(vocab)) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() # for i, (images, captions, lengths) in enumerate(data_loader): for i, (captions, lengths) in enumerate(styled_data_loader): # images = Variable(images, volatile=True) captions = Variable(captions.long()) if torch.cuda.is_available(): # images = images.cuda() captions = captions.cuda() # features = encoder(images) outputs = decoder(captions, features=None, mode="humorous") print(lengths - 1) print(outputs) print(captions[:, 1:]) loss = masked_cross_entropy(outputs, captions[:, 1:].contiguous(), lengths - 1) print(loss) break
args.hidden_size, len(dataset.out_vocab[0]), args.n_layers, args.dropout, ) # Initialize optimizers and criterion # encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate) # decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate * decoder_learning_ratio) encoder_optimizer = optim.Adadelta(encoder.parameters()) decoder_optimizer = optim.Adadelta(decoder.parameters()) criterion = nn.CrossEntropyLoss() # Move models to GPU if args.USE_CUDA: encoder.cuda() decoder.cuda() # train(dataset, # args.batch_size, # args.n_epochs, # encoder, # decoder, # encoder_optimizer, # decoder_optimizer, # criterion, # 'checkpoints/pov', # lang) # evaluate
imdb_decoder = torch.load(sys.argv[2]) else: imdb_encoder = torch.load(sys.argv[1], map_location={'cuda:0': 'cpu'}) imdb_decoder = torch.load(sys.argv[2], map_location={'cuda:0': 'cpu'}) else: imdb_encoder = EncoderRNN(input_lang.n_words, hidden_size, embedding_matrix) imdb_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, 1, dropout_p=0.1) if use_cuda: imdb_encoder = imdb_encoder.cuda() imdb_decoder = imdb_decoder.cuda() trainIters(imdb_encoder, imdb_decoder, 1500000, print_every=100, plot_every=100, learning_rate=0.01) # save model torch.save( imdb_encoder, 'trained_model/encoder_imdb100000_max16_glove_' + str(print_loss_avg) + '_' + str(maximum_norm)) torch.save( imdb_decoder, 'trained_model/decoder_imdb100000_max16_glove_' +
def main(args): torch.cuda.set_device(6) model_path = args.model_path if not os.path.exists(model_path): os.makedirs(model_path) # load vocablary with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) img_path = args.img_path factual_cap_path = args.factual_caption_path humorous_cap_path = args.humorous_caption_path # import data_loader data_loader = get_data_loader(img_path, factual_cap_path, vocab, args.caption_batch_size) styled_data_loader = get_styled_data_loader(humorous_cap_path, vocab, args.language_batch_size) # import models emb_dim = args.emb_dim hidden_dim = args.hidden_dim factored_dim = args.factored_dim vocab_size = len(vocab) encoder = EncoderRNN(voc_size=vocab_size, emb_size=emb_dim, hidden_size=emb_dim) decoder = FactoredLSTM(emb_dim, hidden_dim, factored_dim, vocab_size) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() # loss and optimizer criterion = masked_cross_entropy cap_params = list(decoder.parameters()) + list(encoder.parameters()) lang_params = list(decoder.S_hc.parameters()) + list(decoder.S_hf.parameters()) \ + list(decoder.S_hi.parameters()) + list(decoder.S_ho.parameters()) optimizer_cap = torch.optim.Adam(cap_params, lr=args.lr_caption) optimizer_lang = torch.optim.Adam(lang_params, lr=args.lr_language) # train total_cap_step = len(data_loader) total_lang_step = len(styled_data_loader) epoch_num = args.epoch_num for epoch in range(epoch_num): # caption for i, (messages, m_lengths, targets, t_lengths) in enumerate(data_loader): messages = to_var(messages.long()) targets = to_var(targets.long()) # forward, backward and optimize decoder.zero_grad() encoder.zero_grad() output, features = encoder(messages, list(m_lengths)) outputs = decoder(targets, features, mode="factual") loss = criterion(outputs[:, 1:, :].contiguous(), targets[:, 1:].contiguous(), t_lengths - 1) loss.backward() optimizer_cap.step() # print log if i % args.log_step_caption == 0: print("Epoch [%d/%d], CAP, Step [%d/%d], Loss: %.4f" % (epoch + 1, epoch_num, i, total_cap_step, loss.data[0])) eval_outputs(outputs, vocab) # language for i, (captions, lengths) in enumerate(styled_data_loader): captions = to_var(captions.long()) # forward, backward and optimize decoder.zero_grad() outputs = decoder(captions, mode='humorous') loss = criterion(outputs, captions[:, 1:].contiguous(), lengths - 1) loss.backward() optimizer_lang.step() # print log if i % args.log_step_language == 0: print("Epoch [%d/%d], LANG, Step [%d/%d], Loss: %.4f" % (epoch + 1, epoch_num, i, total_lang_step, loss.data[0])) # save models torch.save(decoder.state_dict(), os.path.join(model_path, 'decoder-%d.pkl' % (epoch + 1, ))) torch.save(encoder.state_dict(), os.path.join(model_path, 'encoder-%d.pkl' % (epoch + 1, )))
decoder_input = decoder_input.cuda() if use_cuda else decoder_input return decoded_words #, decoder_attentions[:di + 1] def evaluateRandomly(encoder, decoder, n=10): for i in range(n): pair = random.choice(pairs) print('>', pair[0]) print('=', pair[1]) output_words = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') input_lang, output_lang, pairs = prepareData('eng', 'fra', True) noise = torch.Tensor(list(range(output_lang.n_words))) print(random.choice(pairs)) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size) decoder1 = DecoderRNN(hidden_size, output_lang.n_words, 1) if use_cuda: encoder1 = encoder1.cuda() decoder1 = decoder1.cuda() trainIters(encoder1, decoder1, 25000, print_every=50) evaluateRandomly(encoder1, decoder1, 20)
def train(x, y, optimizer=optim.Adam, criterion=nn.MSELoss(), n_steps=100, attn_model="general", hidden_size=128, n_layers=1, dropout=0, batch_size=50, elr=0.001, dlr=0.005, clip=50.0, print_every=10, teacher_forcing_ratio=lambda x: 1 if x < 10 else 0): # Configure training/optimization encoder_learning_rate = elr decoder_learning_ratio = dlr # Initialize models encoder = EncoderRNN(1, hidden_size, n_layers, dropout=dropout) decoder = LuongAttnDecoderRNN(attn_model, 1, hidden_size, n_layers, dropout=dropout) # Initialize optimizers and criterion encoder_optimizer = optimizer(encoder.parameters(), lr=encoder_learning_rate) decoder_optimizer = optimizer(decoder.parameters(), lr=decoder_learning_ratio) # Move models to GPU if USE_CUDA: encoder.cuda() decoder.cuda() # Begin! print_loss_total = 0 step = 0 while step < n_steps: step += 1 # Get training data for this cycle batch_idx = np.random.randint(0, x.shape[1], batch_size) input_batches, target_batches = x[:, batch_idx], y[:, batch_idx] # Run the train function loss, _ = _train(input_batches, target_batches, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio=teacher_forcing_ratio(step), clip=clip) # print(np.mean(np.square((output.data.cpu().numpy() - series[-20:, batch_idx])))) # Keep track of loss print_loss_total += loss if step % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '(%d %d%%) %.4f' % (step, step / n_steps * 100, print_loss_avg) print(print_summary) return encoder, decoder