def test(epoch): model.eval() test_loss = 0 with torch.no_grad(): for i, (data, _) in enumerate(test_loader): data = data.to(device) data = NamedTensor(data, ("batch", "ch", "height", "width")) recon_batch, normal = model(data) test_loss += loss_function(recon_batch, data, normal).item() if i == 0: n = min(data.size("batch"), 8) group = [ data.narrow("batch", 0, n), recon_batch.split(x=("ch", "height", "width"), height=28, width=28).narrow("batch", 0, n), ] comparison = ntorch.cat(group, "batch") save_image( comparison.values.cpu(), "results/reconstruction_" + str(epoch) + ".png", nrow=n, ) test_loss /= len(test_loader.dataset) print("====> Test set loss: {:.4f}".format(test_loss))
def batchify_sentences(data, bsz, device="cuda"): data = torch.cat(data, -1) nbatch = data.size(0) // bsz data = data.narrow(0, 0, nbatch * bsz) data = data.view(bsz, -1).t().contiguous() if "cuda" in device: data = data.cuda() return data[:-1, :], data[1:, :]
def batchify(data, bsz): nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit data = data.narrow(0, 0, nbatch * bsz) # torch.narrow(input, dim, start, length) → Tensor data = data.view(bsz, -1).t().contiguous() return data
def batchify(data, bsz, args): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.gpu: data = data.cuda() return data
def __iter__(self): # Starting from sequential data, batchify arranges the dataset into columns. # For instance, with the alphabet as the sequence and batch size 4, we'd get # ┌ a g m s ┐ # │ b h n t │ # │ c i o u │ # │ d j p v │ # │ e k q w │ # └ f l r x ┘. # These columns are treated as independent by the model, which means that the # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient # batch processing. # def batchify(data, bsz): # # Work out how cleanly we can divide the dataset into bsz parts. # nbatch = data.size(0) // bsz # # Trim off any extra elements that wouldn't cleanly fit (remainders). # data = data.narrow(0, 0, nbatch * bsz) # # Evenly divide the data across the bsz batches. # data = data.view(bsz, -1).t().contiguous() # return data.to(device) # get_batch subdivides the source data into chunks of length args.bptt. # If source is equal to the example output of the batchify function, with # a bptt-limit of 2, we'd get the following two Variables for i = 0: # ┌ a g m s ┐ ┌ b h n t ┐ # └ b h n t ┘ └ c i o u ┘ # Note that despite the name of the function, the subdivison of data is not # done along the batch dimension (i.e. dimension 1), since that was handled # by the batchify function. The chunks are along dimension 0, corresponding # to the seq_len dimension in the LSTM. # def get_batch(source, i): # seq_len = min(args.bptt, len(source) - 1 - i) # data = source[i:i+seq_len] # target = source[i+1:i+1+seq_len].view(-1) # return data, target # tests: # data = torch.Tensor([i for i in range(ord('a'),ord('z')+1)]).long() # [xyz = chr(i) for i in [for r in data]] # # each sampler returns indices, use those indices data = torch.LongTensor(list(self.sampler)) nbatch = data.size(0) // self.batch_size data = data.narrow(0, 0, nbatch * self.batch_size) data = data.view(self.batch_size, -1).t() # this is important! for row_as_batch in data: yield row_as_batch.tolist()
def rand_write_train(args, train_loader, validation_loader): # define model and optimizer model = LSTMRandWriter(args.cell_size, args.num_clusters) if cuda: model = model.cuda() optimizer = optim.Adam([{'params':model.parameters()},], lr=args.learning_rate) # initialize null hidden states and memory states init_states = [torch.zeros((1,args.batch_size,args.cell_size))]*4 if cuda: init_states = [state.cuda() for state in init_states] init_states = [Variable(state, requires_grad = False) for state in init_states] h1_init, c1_init, h2_init, c2_init = init_states t_loss = [] v_loss = [] best_validation_loss = 1E10 # update training time start_time = time.time() for epoch in range(args.num_epochs): train_loss = 0 for batch_idx, (data, masks, onehots, text_lens) in enumerate(train_loader): # gather training batch step_back = data.narrow(1,0,args.timesteps) x = Variable(step_back, requires_grad=False) masks = Variable(masks, requires_grad=False) masks = masks.narrow(1,0,args.timesteps) optimizer.zero_grad() # feed forward outputs = model(x, (h1_init, c1_init), (h2_init, c2_init)) end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho , prev, prev2 = outputs # supervision data = data.narrow(1,1,args.timesteps) y = Variable(data, requires_grad=False) loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks) loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(\ epoch+1, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) # update training performance print('====> Epoch: {} Average train loss: {:.4f}'.format(\ epoch+1, train_loss/(len(train_loader.dataset)//args.batch_size))) t_loss.append(train_loss/(len(train_loader.dataset)//args.batch_size)) # validation # prepare validation sample data (validation_samples, masks, onehots, text_lens) = list(enumerate(validation_loader))[0][1] step_back2 = validation_samples.narrow(1,0,args.timesteps) masks = Variable(masks, requires_grad=False) masks = masks.narrow(1,0,args.timesteps) x = Variable(step_back2, requires_grad=False) validation_samples = validation_samples.narrow(1,1,args.timesteps) y = Variable(validation_samples, requires_grad = False) outputs = model(y, (h1_init, c1_init), (h2_init, c2_init)) end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho , prev, prev2 = outputs loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks) validation_loss = loss.item() print('====> Epoch: {} Average validation loss: {:.4f}'.format(\ epoch+1, validation_loss)) v_loss.append(validation_loss) if validation_loss < best_validation_loss: best_validation_loss = validation_loss save_checkpoint(epoch, model, validation_loss, optimizer, args.model_dir, args.task + '_best.pt') # # learning rate annealing # if (epoch+1)%10 == 0: # optimizer = decay_learning_rate(optimizer) # checkpoint model and training filename = args.task + '_epoch_{}.pt'.format(epoch+1) save_checkpoint(epoch, model, validation_loss, optimizer, args.model_dir, filename) print('wall time: {}s'.format(time.time()-start_time)) f1 = plt.figure(1) plt.plot(range(1, args.num_epochs+1), t_loss, color='blue', linestyle='solid') plt.plot(range(1, args.num_epochs+1), v_loss, color='red', linestyle='solid') f1.savefig(args.task +"_loss_curves", bbox_inches='tight')
def synthesis_train(args, train_loader, validation_loader): # infer padded text len and vocab len padded_text_len, vocab_len = train_loader.dataset[0][2].size() # define model and optimizer model = LSTMSynthesis(padded_text_len, vocab_len, args.cell_size, args.num_clusters, args.K) if cuda: model = model.cuda() optimizer = optim.Adam([{'params':model.parameters()},], lr=args.learning_rate) # initialize null hidden, memory states and cluster centers h1_init = c1_init = torch.zeros((args.batch_size, args.cell_size)) h2_init = c2_init = torch.zeros((1, args.batch_size, args.cell_size)) kappa_old = torch.zeros(args.batch_size, args.K) if cuda: h1_init, c1_init = h1_init.cuda(), c1_init.cuda() h2_init, c2_init = h2_init.cuda(), c2_init.cuda() kappa_old = kappa_old.cuda() h1_init, c1_init = Variable(h1_init, requires_grad=False), Variable(c1_init, requires_grad=False) h2_init, c2_init = Variable(h2_init, requires_grad=False), Variable(c2_init, requires_grad=False) kappa_old = Variable(kappa_old, requires_grad=False) t_loss = [] v_loss = [] best_validation_loss = 1E10 # training start_time = time.time() for epoch in range(args.num_epochs): train_loss =0 for batch_idx, (data, masks, onehots, text_lens) in enumerate(train_loader): # gather training batch step_back = data.narrow(1,0,args.timesteps) x = Variable(step_back, requires_grad=False) onehots = Variable(onehots, requires_grad = False) masks = Variable(masks, requires_grad=False) masks = masks.narrow(1,0,args.timesteps) text_lens = Variable(text_lens, requires_grad=False) # focus window weight on first text char w_old = onehots.narrow(1,0,1).squeeze() optimizer.zero_grad() # feed forward outputs = model(x,onehots, text_lens, w_old, kappa_old, (h1_init, c1_init), (h2_init, c2_init)) end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, w, kappa, prev, prev2, old_phi = outputs data = data.narrow(1,1,args.timesteps) y = Variable(data, requires_grad=False) loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks) loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch+1, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) print('====> Epoch: {} Average train loss: {:.4f}'.format( epoch+1, train_loss/(len(train_loader.dataset)//args.batch_size))) t_loss.append(train_loss/(len(train_loader.dataset)//args.batch_size)) # validation # prepare validation data (validation_samples, masks, onehots, text_lens) = list(enumerate(validation_loader))[0][1] step_back = validation_samples.narrow(1,0,args.timesteps) masks = Variable(masks, requires_grad=False) masks = masks.narrow(1,0,args.timesteps) onehots = Variable(onehots, requires_grad=False) text_lens = Variable(text_lens, requires_grad=False) w_old = onehots.narrow(1,0,1).squeeze() x = Variable(step_back, requires_grad=False) validation_samples = validation_samples.narrow(1,1,args.timesteps) y = Variable(validation_samples, requires_grad = False) outputs = model(x, onehots, text_lens, w_old, kappa_old, (h1_init, c1_init), (h2_init, c2_init)) end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, w, kappa, prev, prev2, old_phi = outputs loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks) validation_loss = loss.item() print('====> Epoch: {} Average validation loss: {:.4f}'.format(\ epoch+1, validation_loss)) v_loss.append(validation_loss) # # learning rate annealing # if (epoch+1)%10 == 0: # optimizer = decay_learning_rate(optimizer) # checkpoint model and training filename = args.task + '_epoch_{}.pt'.format(epoch+1) save_checkpoint(epoch, model, validation_loss, optimizer, args.model_dir, filename) print('wall time: {}s'.format(time.time()-start_time)) f1 = plt.figure(1) plt.plot(range(1, args.num_epochs+1), t_loss, color='blue', linestyle='solid') plt.plot(range(1, args.num_epochs+1), v_loss, color='red', linestyle='solid') f1.savefig(args.task +"_loss_curves", bbox_inches='tight')