Example #1
0
 def __init__(self, w_shape=None, w=None):
     super().__init__()
     if w is not None:
         self.w = torch.nn.Parameter(to_gpu(w.data))
     else:
         self.w = torch.nn.Parameter(to_gpu(torch.randn(w_shape)))
     self.input_shape = [None, int(self.w.shape[0])]
Example #2
0
 def forward(ctx, a):
     b = torch.cumsum(a, 0)
     dim1 = int(a.shape[0])
     dim0 = int(ceil(b[-1]))
     #t_dim = [dim0, dim1]
     t_dim = [dim1, dim1]
     trans_mat = to_gpu(torch.zeros(t_dim))
     grad_indices = to_gpu(torch.FloatTensor(dim1))
     #trans_grad = torch.zeros(t_dim.append(a.shape[0]))
     prev_ind = 0
     cross_boundary = [None] * dim1
     for i, x in enumerate(zip(a, b)):
         ai, bi = x
         this_ind = floor(bi)
         if this_ind == prev_ind:
             trans_mat[this_ind, i] = ai[0]
             cross_boundary.append(False)
         else:  # we just crossed an integer boundary
             tmp = bi - this_ind
             trans_mat[this_ind, i] = tmp[0]
             trans_mat[this_ind - 1, i] = ai[0] - tmp[0]
             cross_boundary.append(True)
         grad_indices[i] = this_ind
         prev_ind = this_ind
     # assert ((a - trans_mat.sum(0)).abs().max() < 1e-6)
     # assert ((torch.ones(trans_mat.shape[0] - 1) - trans_mat.sum(1)[:-1]).abs().max() < 1e-6)
     #ctx.save_for_backward(a)
     ctx.a = a
     ctx.grad_indices = grad_indices
     ctx.cross_boundary = cross_boundary
     return to_gpu(trans_mat)
 def init_hidden(self, batch_size):
     # NOTE: assume only 1 layer no bi-direction
     h1 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)),
                   requires_grad=False)
     h2 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)),
                   requires_grad=False)
     h3 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)),
                   requires_grad=False)
     return h1, h2, h3
 def __init__(self,
              z_size=200,
              hidden_n=200,
              feature_len=12,
              max_seq_length=15,
              encoder_kernel_sizes=(2, 3, 4)):
     super(GrammarVariationalAutoEncoder, self).__init__()
     self.encoder = to_gpu(
         Encoder(max_seq_length=max_seq_length,
                 encoder_kernel_sizes=encoder_kernel_sizes,
                 z_size=z_size,
                 feature_len=feature_len))
     self.decoder = to_gpu(
         Decoder(z_size=z_size,
                 hidden_n=hidden_n,
                 feature_len=feature_len,
                 max_seq_length=max_seq_length))
def inputs2pytorch(inputs_):
    # reshape the data to our needs
    inputs = torch.FloatTensor(inputs_['the_input']).permute(1, 0, 2)
    inputs = Variable(to_gpu(inputs))  #, requires_grad=True)

    probs_sizes = inputs_['input_length'].T[0]
    label_sizes = inputs_['label_length'].T[0]
    labels = []
    for i, row in enumerate(inputs_['the_labels']):
        labels += list(row[:int(label_sizes[i])])

    labels = Variable(torch.IntTensor([int(label) for label in labels]))
    probs_sizes = Variable(torch.IntTensor([int(x) for x in probs_sizes]))
    label_sizes = Variable(torch.IntTensor([int(x) for x in label_sizes]))
    return (inputs, probs_sizes), (labels, label_sizes)
Example #6
0
 def backward(ctx, grad_output):
     #print('grad output:', grad_output)
     #a, = ctx.saved_variables
     a = Variable(ctx.a)
     grad_indices = ctx.grad_indices
     my_grad = to_gpu(torch.zeros_like(a))
     for k, ind in enumerate(grad_indices):
         my_grad[k] = grad_output[int(ind), k]
         for j in range(k + 1, int(grad_output.data.shape[1])):
             if ctx.cross_boundary[j]:
                 iofj = int(grad_indices[j])
                 my_grad[k] = my_grad[k] +\
                             (grad_output[iofj,j] - \
                             grad_output[iofj-1,j] )
                 #print(my_grad[k].view(1,-1))
     return my_grad  #torch.ones_like(a)
def apply_masks(x_true, x_pred, masks, ind_to_lhs_ind):
    '''
    Apply grammar transition rules to a softmax matrix
    :param x_true: Variable of actual transitions, one-hot encoded, batch x sequence x element
    :param x_pred: Variable of probabilities, past softmax, same shape as x_true
    :return: x_pred zeroed out and rescaled
    '''

    x_size = x_true.size()
    mask = to_gpu(torch.ones(*x_size))
    for i in range(0, x_size[0]):
        for j in range(0, x_size[1]):
            # argmax
            true_rule_ind = torch.max(x_true.data[i, j, :], 0)[1][0]
            # look up lhs from true one-hot, mask must be for that lhs
            mask[i, j, :] = masks[ind_to_lhs_ind[true_rule_ind]]

    # nuke the transitions prohibited if we follow x_true
    x_resc = x_pred * Variable(mask)
    # and rescale the softmax to sum=1 again
    scaler = torch.sum(x_resc, dim=2, keepdim=True)
    scaler2 = torch.cat([scaler] * x_size[2], dim=2)
    out = x_resc / scaler2
    return out
Example #8
0
import torch
from torch.optim import lr_scheduler
from gpu_utils import to_gpu
from models.simple_models import Net
from torch.utils.data import DataLoader
from data_utils.data_sources import data_gen, DatasetFromModel
from fit import fit

true_w = to_gpu(torch.ones((20, 1)))
random_w = torch.randn(true_w.shape)

true_model = to_gpu(Net(true_w))
model = to_gpu(Net(random_w))

optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8)
criterion = torch.nn.MSELoss()
epochs = 20
save_path = 'test.mdl'

valid_dataset = DatasetFromModel(first_dim=1, batches=256, model=true_model)
train_dataset = DatasetFromModel(first_dim=1, batches=1024, model=true_model)
valid_loader = DataLoader(valid_dataset, batch_size=256)
train_loader = DataLoader(valid_dataset, batch_size=64)

fit(train_gen=train_loader,
    valid_gen=valid_loader,
    model=model,
    optimizer=optimizer,
    scheduler=scheduler,
    epochs=epochs,
Example #9
0
 def __init__(self, w_shape):
     super().__init__()
     self.w = torch.nn.Parameter(to_gpu(torch.randn(w_shape)))
     self.input_shape = [None, int(self.w.shape[0])]
def my_ctc_loss(ext_probs, ext_labels):
    ctc_loss = to_gpu(CTCLoss())
    return ctc_loss(ext_probs[0], ext_labels[0], ext_probs[1],
                    ext_labels[1]) / batch_size