コード例 #1
0
ファイル: vae.py プロジェクト: wfus/namedtensor
def test(epoch):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i, (data, _) in enumerate(test_loader):
            data = data.to(device)
            data = NamedTensor(data, ("batch", "ch", "height", "width"))
            recon_batch, normal = model(data)
            test_loss += loss_function(recon_batch, data, normal).item()
            if i == 0:
                n = min(data.size("batch"), 8)
                group = [
                    data.narrow("batch", 0, n),
                    recon_batch.split(x=("ch", "height", "width"),
                                      height=28,
                                      width=28).narrow("batch", 0, n),
                ]

                comparison = ntorch.cat(group, "batch")
                save_image(
                    comparison.values.cpu(),
                    "results/reconstruction_" + str(epoch) + ".png",
                    nrow=n,
                )

    test_loss /= len(test_loader.dataset)
    print("====> Test set loss: {:.4f}".format(test_loss))
コード例 #2
0
ファイル: torch_utils.py プロジェクト: scape1989/aw_nas
def batchify_sentences(data, bsz, device="cuda"):
    data = torch.cat(data, -1)
    nbatch = data.size(0) // bsz
    data = data.narrow(0, 0, nbatch * bsz)
    data = data.view(bsz, -1).t().contiguous()
    if "cuda" in device:
        data = data.cuda()
    return data[:-1, :], data[1:, :]
コード例 #3
0
ファイル: utils.py プロジェクト: park-cheol/NLP_RNN-LSTM-GRU
def batchify(data, bsz):
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit
    data = data.narrow(0, 0, nbatch * bsz)
    # torch.narrow(input, dim, start, length) → Tensor
    data = data.view(bsz, -1).t().contiguous()

    return data
コード例 #4
0
ファイル: yelp.py プロジェクト: yuedongP/Point-Then-Operate
def batchify(data, bsz, args):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    if args.gpu:
        data = data.cuda()
    return data
コード例 #5
0
ファイル: utils.py プロジェクト: uhh-lt/lttc
    def __iter__(self):
        # Starting from sequential data, batchify arranges the dataset into columns.
        # For instance, with the alphabet as the sequence and batch size 4, we'd get
        # ┌ a g m s ┐
        # │ b h n t │
        # │ c i o u │
        # │ d j p v │
        # │ e k q w │
        # └ f l r x ┘.
        # These columns are treated as independent by the model, which means that the
        # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
        # batch processing.
        #  def batchify(data, bsz):
        #    # Work out how cleanly we can divide the dataset into bsz parts.
        #    nbatch = data.size(0) // bsz
        #    # Trim off any extra elements that wouldn't cleanly fit (remainders).
        #    data = data.narrow(0, 0, nbatch * bsz)
        #    # Evenly divide the data across the bsz batches.
        #    data = data.view(bsz, -1).t().contiguous()
        #    return data.to(device)

        # get_batch subdivides the source data into chunks of length args.bptt.
        # If source is equal to the example output of the batchify function, with
        # a bptt-limit of 2, we'd get the following two Variables for i = 0:
        # ┌ a g m s ┐ ┌ b h n t ┐
        # └ b h n t ┘ └ c i o u ┘
        # Note that despite the name of the function, the subdivison of data is not
        # done along the batch dimension (i.e. dimension 1), since that was handled
        # by the batchify function. The chunks are along dimension 0, corresponding
        # to the seq_len dimension in the LSTM.
        #  def get_batch(source, i):
        #    seq_len = min(args.bptt, len(source) - 1 - i)
        #    data = source[i:i+seq_len]
        #    target = source[i+1:i+1+seq_len].view(-1)
        #    return data, target

        # tests:
        # data = torch.Tensor([i for i in range(ord('a'),ord('z')+1)]).long()
        # [xyz = chr(i) for i in [for r in data]]
        #

        # each sampler returns indices, use those indices
        data = torch.LongTensor(list(self.sampler))
        nbatch = data.size(0) // self.batch_size
        data = data.narrow(0, 0, nbatch * self.batch_size)
        data = data.view(self.batch_size, -1).t()  # this is important!

        for row_as_batch in data:
            yield row_as_batch.tolist()
コード例 #6
0
def rand_write_train(args, train_loader, validation_loader):
    # define model and optimizer
    model = LSTMRandWriter(args.cell_size, args.num_clusters)
    if cuda:
        model = model.cuda()
    
    optimizer = optim.Adam([{'params':model.parameters()},], lr=args.learning_rate)
    
    # initialize null hidden states and memory states
    init_states = [torch.zeros((1,args.batch_size,args.cell_size))]*4
    if cuda:
        init_states  = [state.cuda() for state in init_states]
    init_states  = [Variable(state, requires_grad = False) for state in init_states]
    h1_init, c1_init, h2_init, c2_init = init_states

    t_loss = []
    v_loss = []
    best_validation_loss = 1E10

    # update training time
    start_time = time.time()
    
    for epoch in range(args.num_epochs):
        train_loss = 0
        for batch_idx, (data, masks, onehots, text_lens) in enumerate(train_loader):
            
            # gather training batch
            step_back = data.narrow(1,0,args.timesteps)
            x = Variable(step_back, requires_grad=False)
            masks = Variable(masks, requires_grad=False)
            masks = masks.narrow(1,0,args.timesteps)
            
            optimizer.zero_grad()
            # feed forward
            outputs = model(x, (h1_init, c1_init), (h2_init, c2_init))
            end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho , prev, prev2 = outputs
            
            # supervision
            data = data.narrow(1,1,args.timesteps)
            y = Variable(data, requires_grad=False)
            loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()
            
            if batch_idx % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(\
                    epoch+1, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader),
                    loss.item()))
        
        # update training performance
        print('====> Epoch: {} Average train loss: {:.4f}'.format(\
            epoch+1, train_loss/(len(train_loader.dataset)//args.batch_size)))
        t_loss.append(train_loss/(len(train_loader.dataset)//args.batch_size))
        
        # validation
        # prepare validation sample data
        (validation_samples, masks, onehots, text_lens) = list(enumerate(validation_loader))[0][1]
        step_back2 = validation_samples.narrow(1,0,args.timesteps)
        masks = Variable(masks, requires_grad=False)
        masks = masks.narrow(1,0,args.timesteps)
        
        x = Variable(step_back2, requires_grad=False)
        
        validation_samples = validation_samples.narrow(1,1,args.timesteps)
        y = Variable(validation_samples, requires_grad = False)
        
        outputs = model(y, (h1_init, c1_init), (h2_init, c2_init))
        end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho , prev, prev2 = outputs 
        loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks)
        validation_loss = loss.item()
        print('====> Epoch: {} Average validation loss: {:.4f}'.format(\
            epoch+1, validation_loss))
        v_loss.append(validation_loss)
    
        if validation_loss < best_validation_loss:
            best_validation_loss = validation_loss
            save_checkpoint(epoch, model, validation_loss, optimizer, args.model_dir, args.task + '_best.pt')
        
        # # learning rate annealing
        # if (epoch+1)%10 == 0:
        #     optimizer = decay_learning_rate(optimizer)
        
        # checkpoint model and training
        filename = args.task + '_epoch_{}.pt'.format(epoch+1)
        save_checkpoint(epoch, model, validation_loss, optimizer, args.model_dir, filename)
        
        print('wall time: {}s'.format(time.time()-start_time))
        
    f1 = plt.figure(1)
    plt.plot(range(1, args.num_epochs+1), t_loss, color='blue', linestyle='solid')
    plt.plot(range(1, args.num_epochs+1), v_loss, color='red', linestyle='solid')
    f1.savefig(args.task +"_loss_curves", bbox_inches='tight')
コード例 #7
0
def synthesis_train(args, train_loader, validation_loader):
    # infer padded text len and vocab len
    padded_text_len, vocab_len = train_loader.dataset[0][2].size()
    
    # define model and optimizer
    model = LSTMSynthesis(padded_text_len, vocab_len, args.cell_size, args.num_clusters, args.K)
    if cuda:
        model = model.cuda()
    
    optimizer = optim.Adam([{'params':model.parameters()},], lr=args.learning_rate)
    
    # initialize null hidden, memory states and cluster centers
    h1_init = c1_init = torch.zeros((args.batch_size, args.cell_size))
    h2_init = c2_init = torch.zeros((1, args.batch_size, args.cell_size))
    kappa_old = torch.zeros(args.batch_size, args.K)
    
    if cuda:
        h1_init, c1_init = h1_init.cuda(), c1_init.cuda()
        h2_init, c2_init = h2_init.cuda(), c2_init.cuda()
        kappa_old = kappa_old.cuda()
        
    h1_init, c1_init = Variable(h1_init, requires_grad=False), Variable(c1_init, requires_grad=False)
    h2_init, c2_init = Variable(h2_init, requires_grad=False), Variable(c2_init, requires_grad=False)
    kappa_old = Variable(kappa_old, requires_grad=False)
    
    t_loss = []
    v_loss = []
    best_validation_loss = 1E10
    
    # training
    start_time = time.time()
    for epoch in range(args.num_epochs):
        train_loss =0
        for batch_idx, (data, masks, onehots, text_lens) in enumerate(train_loader):
            
            # gather training batch
            step_back = data.narrow(1,0,args.timesteps)
            x = Variable(step_back, requires_grad=False)
            onehots = Variable(onehots, requires_grad = False)
            masks = Variable(masks, requires_grad=False)
            masks = masks.narrow(1,0,args.timesteps)
            text_lens = Variable(text_lens, requires_grad=False)
            
            # focus window weight on first text char
            w_old = onehots.narrow(1,0,1).squeeze()
            
            optimizer.zero_grad()
            
            # feed forward
            outputs = model(x,onehots, text_lens, w_old, kappa_old, (h1_init, c1_init), (h2_init, c2_init))
            end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, w, kappa, prev, prev2, old_phi = outputs
            data = data.narrow(1,1,args.timesteps)
            y = Variable(data, requires_grad=False)
            loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()
            if batch_idx % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch+1, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader),
                    loss.item()))
    
        print('====> Epoch: {} Average train loss: {:.4f}'.format(
            epoch+1, train_loss/(len(train_loader.dataset)//args.batch_size)))
        t_loss.append(train_loss/(len(train_loader.dataset)//args.batch_size))
    
        # validation
        # prepare validation data
        (validation_samples, masks, onehots, text_lens) = list(enumerate(validation_loader))[0][1]
        step_back = validation_samples.narrow(1,0,args.timesteps)
        masks = Variable(masks, requires_grad=False)
        masks = masks.narrow(1,0,args.timesteps)
        onehots = Variable(onehots, requires_grad=False)
        text_lens = Variable(text_lens, requires_grad=False)
        
        w_old = onehots.narrow(1,0,1).squeeze()
        x = Variable(step_back, requires_grad=False)
        
        validation_samples = validation_samples.narrow(1,1,args.timesteps)
        y = Variable(validation_samples, requires_grad = False)
    
        outputs = model(x, onehots, text_lens, w_old, kappa_old, (h1_init, c1_init), (h2_init, c2_init))
        end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, w, kappa, prev, prev2, old_phi = outputs
        loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, rho, y, masks)/torch.sum(masks)
        validation_loss = loss.item()
        print('====> Epoch: {} Average validation loss: {:.4f}'.format(\
            epoch+1, validation_loss))
        v_loss.append(validation_loss)
    
        
        # # learning rate annealing
        # if (epoch+1)%10 == 0:
        #     optimizer = decay_learning_rate(optimizer)
        
        # checkpoint model and training
        filename = args.task + '_epoch_{}.pt'.format(epoch+1)
        save_checkpoint(epoch, model, validation_loss, optimizer, args.model_dir, filename)
        
        print('wall time: {}s'.format(time.time()-start_time))
        
    f1 = plt.figure(1)
    plt.plot(range(1, args.num_epochs+1), t_loss, color='blue', linestyle='solid')
    plt.plot(range(1, args.num_epochs+1), v_loss, color='red', linestyle='solid')
    f1.savefig(args.task +"_loss_curves", bbox_inches='tight')