def example_test(): encoder_test = EncoderRNN(10, 10, 2, max_length=3) decoder_test = AttnDecoderRNN('general', 10, 10, 2) print(encoder_test) print(decoder_test) encoder_hidden = encoder_test.init_hidden(batch_size=4) # word_input = Variable(torch.LongTensor([[1, 2, 3]])) word_input = Variable(torch.LongTensor( [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]])) if USE_CUDA: encoder_test.cuda() word_input = word_input.cuda() encoder_hidden = encoder_hidden.cuda() encoder_outputs, encoder_hidden = encoder_test( word_input, encoder_hidden) # S B H, L B H print(encoder_outputs.shape, encoder_hidden.shape) # word_inputs = Variable(torch.LongTensor([[1, 2, 3]])) word_inputs = Variable(torch.LongTensor( [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]])) decoder_attns = torch.zeros(4, 3, 3) decoder_hidden = encoder_hidden decoder_context = Variable(torch.zeros(4, decoder_test.hidden_size)) if USE_CUDA: decoder_test.cuda() word_inputs = word_inputs.cuda() decoder_context = decoder_context.cuda() for i in range(3): decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test( word_inputs[:, i], decoder_context, decoder_hidden, encoder_outputs) print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size()) decoder_attns[:, i, :] = decoder_attn.squeeze(1).cpu().data
pair = random.choice(pairs) print('>', pair[0]) print('=', pair[1]) output_words, attentions = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() trainIters(encoder1, attn_decoder1, 75000, print_every=5000) ###################################################################### # evaluateRandomly(encoder1, attn_decoder1) output_words, attentions = evaluate(encoder1, attn_decoder1, "je suis trop froid .") plt.matshow(attentions.numpy()) def showAttention(input_sentence, output_words, attentions): # Set up figure with colorbar
def eval_network(fn_in_model): # Input # fn_in_model : filename of saved model # # Create filename for output fn_out_res = fn_in_model fn_out_res = fn_out_res.replace('.tar', '.txt') fn_out_res_test = fn_out_res.replace('/net_', '/res_test_') # Load and evaluate the network in filename 'fn_in_model' assert (os.path.isfile(fn_in_model)) print(' Checkpoint found...') print(' Processing model: ' + fn_in_model) print(' Writing to file: ' + fn_out_res_test) checkpoint = torch.load(fn_in_model, map_location='cpu') # evaluate model on CPU input_lang = checkpoint['input_lang'] output_lang = checkpoint['output_lang'] emb_size = checkpoint['emb_size'] nlayers = checkpoint['nlayers'] dropout_p = checkpoint['dropout'] input_size = input_lang.n_symbols output_size = output_lang.n_symbols samples_val = checkpoint['episodes_validation'] disable_memory = checkpoint['disable_memory'] max_length_eval = checkpoint['max_length_eval'] if 'args' not in checkpoint or 'disable_attention' not in checkpoint[ 'args']: use_attention = True else: args = checkpoint['args'] use_attention = not args.disable_attention if disable_memory: encoder = WrapperEncoderRNN(emb_size, input_size, output_size, nlayers, dropout_p) else: encoder = MetaNetRNN(emb_size, input_size, output_size, nlayers, dropout_p) if use_attention: decoder = AttnDecoderRNN(emb_size, output_size, nlayers, dropout_p) else: decoder = DecoderRNN(emb_size, output_size, nlayers, dropout_p) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() encoder.load_state_dict(checkpoint['encoder_state_dict']) decoder.load_state_dict(checkpoint['decoder_state_dict']) with open(fn_out_res_test, 'w') as f_test: with redirect_stdout(f_test): if 'episode' in checkpoint: print(' Loading epoch ' + str(checkpoint['episode']) + ' of ' + str(checkpoint['num_episodes'])) describe_model(encoder) describe_model(decoder) if eval_type == 'val': print( 'Evaluating VALIDATION performance on pre-generated validation set' ) acc_val_gen, acc_val_retrieval = evaluation_battery( samples_val, encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) print('Acc Retrieval (val): ' + str(round(acc_val_retrieval, 1))) print('Acc Generalize (val): ' + str(round(acc_val_gen, 1))) elif eval_type == 'addprim_jump': print('Evaluating TEST performance on SCAN addprim_jump') print(' ...support set is just the isolated primitives') mybatch = scan_evaluation_prim_only('addprim_jump', 'test', input_lang, output_lang) acc_val_gen, acc_val_retrieval = evaluation_battery( [mybatch], encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) elif eval_type == 'length': print('Evaluating TEST performance on SCAN length') print( ' ...over multiple support sets as contributed by the pre-generated validation set' ) samples_val = scan_evaluation_val_support( 'length', 'test', input_lang, output_lang, samples_val) acc_val_gen, acc_val_retrieval = evaluation_battery( samples_val, encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) print('Acc Retrieval (val): ' + str(round(acc_val_retrieval, 1))) print('Acc Generalize (val): ' + str(round(acc_val_gen, 1))) elif eval_type == 'template_around_right': print('Evaluating TEST performance on the SCAN around right') print(' ...with just direction mappings as support set') mybatch = scan_evaluation_dir_only('template_around_right', 'test', input_lang, output_lang) acc_val_gen, acc_val_retrieval = evaluation_battery( [mybatch], encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) else: assert False
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) else: encoder = EncoderRNN(embedding_size, emb) decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0) criterion = nn.NLLLoss() total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get the average loss on the sentences loss = sentenceloss(rt, re, rm, summary, encoder, decoder, loss_optimizer, criterion, embedding_size, encoder_style) total_loss += loss # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, avg loss = {:.4f}".format( gettime(start), iteration, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "{}_decoder_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return encoder, decoder
decoder = AttnDecoderRNN(config.ATT_MODEL, config.HIDDEN_SIZE, chinese.n_words, config.NUM_LAYER, dropout_p=config.DROPOUT) if config.RESTORE: encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth") decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth") encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) # Move models to GPU if config.USE_CUDA: encoder.cuda() decoder.cuda() # Initialize optimizers and criterion encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.LR) decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.LR) criterion = LanguageModelCriterion() #nn.NLLLoss(ignore_index=0) # Keep track of time elapsed and running averages start = time.time() plot_losses = [] print_loss_total = 0 plot_loss_total = 0 for epoch in range(1, config.NUM_ITER + 1): # Get training data for this cycle
# Load pairs.pkl and lang.pkl with open(args.data_path + "/pairs.pkl", 'rb') as f: (train_pairs, test_pairs) = pkl.load(f) with open(args.data_path + "/lang.pkl", 'rb') as f: lang_tuple = pkl.load(f) lang = Lang(lang_tuple) # Prepare dataloader for training train_dataiter = DataIter(train_pairs, lang, args.vocab_size, args.batch_size, args.cuda) # Set encoder and decoder encoder = Encoder(args.vocab_size, args.hidden_size) decoder = AttnDecoderRNN(args.attn, args.hidden_size, args.vocab_size, args.n_layers, args.dropout, args.cuda) if args.cuda: encoder = encoder.cuda() decoder = decoder.cuda() # Set optimizer and criterion encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay) decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.lr, weight_decay=args.weight_decay) encoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer=encoder_optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=0.00001) decoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer=decoder_optimizer, mode='min', factor=0.1,
class Train(object): """ """ #TODO : need to change, with no dataset in the train class, a apply method should be #TODO : in this class, which accept a dataset parameters and train the dataset . def __init__(self, config, dataset): self.config = config self.n_epochs = config.n_epochs self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config) self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words, config=config) self.encoder_optimizer = config.optimizier(self.encoder.parameters(), lr=config.learning_rate) self.decoder_optimizer = config.optimizier(self.decoder.parameters(), lr=config.learning_rate) self.criterion = nn.NLLLoss() self.is_plot = config.is_plot self.clip_value = config.clip_value self.losses = [] if self.config.USE_CUDA: self.encoder.cuda(self.config.gpu_id) if self.config.USE_CUDA: self.decoder.cuda(device_id=self.config.gpu_id) def train(self, dataset): if self.is_plot: fig, ax = plt.subplots() grid(True) plt.ion() for epoch in range(self.n_epochs): training_pair = dataset.get_sample_var() loss, result_output = self.step(training_pair) print("At Epoch : {:5},Get loss : {:10}\n".format(epoch, loss)) self.losses.append(loss) if self.is_plot: ax.plot(range(epoch + 1), self.losses, "b") plt.pause(0.0001) plt.show() if epoch % 100 == 0: print ''.join([ dataset.target.index2word[i] for i in training_pair[1].squeeze(1).data.tolist() ]) print ''.join( [dataset.target.index2word[i] for i in result_output]) def step(self, training_pair): self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() input_variable = training_pair[0] target_variable = training_pair[1] loss = 0 input_length = input_variable.size()[0] target_length = target_variable.size()[0] encoder_hidden = self.encoder.init_hidden() encoder_outputs, encoder_hidden = self.encoder(input_variable, encoder_hidden) decoder_input = Variable(torch.LongTensor([[self.config.SOS_token]])) decoder_context = Variable(torch.zeros(1, self.decoder.hidden_dim)) decoder_hidden = encoder_hidden if self.config.USE_CUDA: decoder_input = decoder_input.cuda(device_id=self.config.gpu_id) decoder_context = decoder_context.cuda( device_id=self.config.gpu_id) assert type(decoder_input.data) == torch.cuda.LongTensor assert type(decoder_context.data) == torch.cuda.FloatTensor result_output = [] for di in range(target_length): decoder_output, \ decoder_context, \ decoder_hidden, \ decoder_attention = self.decoder(decoder_input, decoder_context, decoder_hidden, encoder_outputs) loss += self.criterion(decoder_output[0], target_variable[di]) topv, topi = decoder_output.data.topk(1) ni = topi[0][0] decoder_input = Variable(torch.LongTensor([[ni]])) if self.config.USE_CUDA: decoder_input = decoder_input.cuda( device_id=self.config.gpu_id) result_output.append(ni) if ni == self.config.EOS_token: break loss.backward() # TODO : clip value torch.nn.utils.clip_grad_norm(self.encoder.parameters(), self.clip_value) torch.nn.utils.clip_grad_norm(self.decoder.parameters(), self.clip_value) self.encoder_optimizer.step() self.decoder_optimizer.step() if self.config.USE_CUDA: return loss.cpu().data[0] / target_length, result_output return loss.data[0] / target_length, result_output
def main(args): if not os.path.exists(args.model_path): os.makedirs(args.model_path) transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) encoder = EncoderCNN(args.embed_size) decoder = AttnDecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) total_step = len(data_loader) decoder_hidden = decoder.init_hidden() for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): images = cuda_variable(images, volatile=True) captions = cuda_variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] decoder.zero_grad() encoder.zero_grad() features = encoder(images) outputs = decoder(captions, decoder_hidden, features, lengths) # outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) loss.backward() optimizer.step() if i % args.log_step == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' %(epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) if (i+1) % args.save_step == 0: torch.save(decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' %(epoch+1, i+1))) torch.save(encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' %(epoch+1, i+1)))
# Word vector word_vector = KeyedVectors.load_word2vec_format("word_vector/koran.vec", binary=True) hidden_size = 64 max_len = 50 encoder = EncoderEmbeddingInputRNN(src_lang.n_words, hidden_size, word_vector) attn_decoder = AttnDecoderRNN(hidden_size, tgt_lang.n_words, dropout_p=0.1, max_length=max_len) if use_cuda: encoder = encoder.cuda() attn_decoder = attn_decoder.cuda() num_iter = 100000 trainer = Trainer(src_lang, tgt_lang, pairs) trainer.train(encoder, attn_decoder, num_iter, print_every=num_iter // 100, max_len=max_len) trainer.evaluateRandomly(encoder, attn_decoder, max_len=max_len) # trainer.evaluateAll(encoder, attn_decoder) encoder.saveState('model/encoder-embedding2-h64' + str(num_iter) + '.pt') attn_decoder.saveState('model/decoder-embedding2-h64' + str(num_iter) + '.pt') # Open testfile as test and build pairs from it
return print_losses # Create data loader dataloader = DataLoader(args.data_base_dir, args.label_path, args.max_aspect_ratio, args.max_encoder_l_h, args.max_encoder_l_w, args.max_decoder_l) # Create the modules of the algorithm cnn1 = CNN() encoder1 = EncoderBRNN(args.batch_size, args.num_layers_encoder, args.hidden_dim_encoder, use_cuda) decoder1 = AttnDecoderRNN(args.hidden_dim_encoder // 2, args.output_dim_decoder, args.num_layers_decoder, args.max_length_decoder, dataloader.vocab_size) if use_cuda: cnn1 = cnn1.cuda() encoder1 = encoder1.cuda() decoder1 = decoder1.cuda() trainIters(args.batch_size, cnn1, encoder1, decoder1, dataloader, args.learning_rate, n_iters=75000, print_every=10, use_cuda=use_cuda)