def __init__(self, w_shape=None, w=None): super().__init__() if w is not None: self.w = torch.nn.Parameter(to_gpu(w.data)) else: self.w = torch.nn.Parameter(to_gpu(torch.randn(w_shape))) self.input_shape = [None, int(self.w.shape[0])]
def forward(ctx, a): b = torch.cumsum(a, 0) dim1 = int(a.shape[0]) dim0 = int(ceil(b[-1])) #t_dim = [dim0, dim1] t_dim = [dim1, dim1] trans_mat = to_gpu(torch.zeros(t_dim)) grad_indices = to_gpu(torch.FloatTensor(dim1)) #trans_grad = torch.zeros(t_dim.append(a.shape[0])) prev_ind = 0 cross_boundary = [None] * dim1 for i, x in enumerate(zip(a, b)): ai, bi = x this_ind = floor(bi) if this_ind == prev_ind: trans_mat[this_ind, i] = ai[0] cross_boundary.append(False) else: # we just crossed an integer boundary tmp = bi - this_ind trans_mat[this_ind, i] = tmp[0] trans_mat[this_ind - 1, i] = ai[0] - tmp[0] cross_boundary.append(True) grad_indices[i] = this_ind prev_ind = this_ind # assert ((a - trans_mat.sum(0)).abs().max() < 1e-6) # assert ((torch.ones(trans_mat.shape[0] - 1) - trans_mat.sum(1)[:-1]).abs().max() < 1e-6) #ctx.save_for_backward(a) ctx.a = a ctx.grad_indices = grad_indices ctx.cross_boundary = cross_boundary return to_gpu(trans_mat)
def init_hidden(self, batch_size): # NOTE: assume only 1 layer no bi-direction h1 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)), requires_grad=False) h2 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)), requires_grad=False) h3 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)), requires_grad=False) return h1, h2, h3
def __init__(self, z_size=200, hidden_n=200, feature_len=12, max_seq_length=15, encoder_kernel_sizes=(2, 3, 4)): super(GrammarVariationalAutoEncoder, self).__init__() self.encoder = to_gpu( Encoder(max_seq_length=max_seq_length, encoder_kernel_sizes=encoder_kernel_sizes, z_size=z_size, feature_len=feature_len)) self.decoder = to_gpu( Decoder(z_size=z_size, hidden_n=hidden_n, feature_len=feature_len, max_seq_length=max_seq_length))
def inputs2pytorch(inputs_): # reshape the data to our needs inputs = torch.FloatTensor(inputs_['the_input']).permute(1, 0, 2) inputs = Variable(to_gpu(inputs)) #, requires_grad=True) probs_sizes = inputs_['input_length'].T[0] label_sizes = inputs_['label_length'].T[0] labels = [] for i, row in enumerate(inputs_['the_labels']): labels += list(row[:int(label_sizes[i])]) labels = Variable(torch.IntTensor([int(label) for label in labels])) probs_sizes = Variable(torch.IntTensor([int(x) for x in probs_sizes])) label_sizes = Variable(torch.IntTensor([int(x) for x in label_sizes])) return (inputs, probs_sizes), (labels, label_sizes)
def backward(ctx, grad_output): #print('grad output:', grad_output) #a, = ctx.saved_variables a = Variable(ctx.a) grad_indices = ctx.grad_indices my_grad = to_gpu(torch.zeros_like(a)) for k, ind in enumerate(grad_indices): my_grad[k] = grad_output[int(ind), k] for j in range(k + 1, int(grad_output.data.shape[1])): if ctx.cross_boundary[j]: iofj = int(grad_indices[j]) my_grad[k] = my_grad[k] +\ (grad_output[iofj,j] - \ grad_output[iofj-1,j] ) #print(my_grad[k].view(1,-1)) return my_grad #torch.ones_like(a)
def apply_masks(x_true, x_pred, masks, ind_to_lhs_ind): ''' Apply grammar transition rules to a softmax matrix :param x_true: Variable of actual transitions, one-hot encoded, batch x sequence x element :param x_pred: Variable of probabilities, past softmax, same shape as x_true :return: x_pred zeroed out and rescaled ''' x_size = x_true.size() mask = to_gpu(torch.ones(*x_size)) for i in range(0, x_size[0]): for j in range(0, x_size[1]): # argmax true_rule_ind = torch.max(x_true.data[i, j, :], 0)[1][0] # look up lhs from true one-hot, mask must be for that lhs mask[i, j, :] = masks[ind_to_lhs_ind[true_rule_ind]] # nuke the transitions prohibited if we follow x_true x_resc = x_pred * Variable(mask) # and rescale the softmax to sum=1 again scaler = torch.sum(x_resc, dim=2, keepdim=True) scaler2 = torch.cat([scaler] * x_size[2], dim=2) out = x_resc / scaler2 return out
import torch from torch.optim import lr_scheduler from gpu_utils import to_gpu from models.simple_models import Net from torch.utils.data import DataLoader from data_utils.data_sources import data_gen, DatasetFromModel from fit import fit true_w = to_gpu(torch.ones((20, 1))) random_w = torch.randn(true_w.shape) true_model = to_gpu(Net(true_w)) model = to_gpu(Net(random_w)) optimizer = torch.optim.SGD(model.parameters(), lr=0.05) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8) criterion = torch.nn.MSELoss() epochs = 20 save_path = 'test.mdl' valid_dataset = DatasetFromModel(first_dim=1, batches=256, model=true_model) train_dataset = DatasetFromModel(first_dim=1, batches=1024, model=true_model) valid_loader = DataLoader(valid_dataset, batch_size=256) train_loader = DataLoader(valid_dataset, batch_size=64) fit(train_gen=train_loader, valid_gen=valid_loader, model=model, optimizer=optimizer, scheduler=scheduler, epochs=epochs,
def __init__(self, w_shape): super().__init__() self.w = torch.nn.Parameter(to_gpu(torch.randn(w_shape))) self.input_shape = [None, int(self.w.shape[0])]
def my_ctc_loss(ext_probs, ext_labels): ctc_loss = to_gpu(CTCLoss()) return ctc_loss(ext_probs[0], ext_labels[0], ext_probs[1], ext_labels[1]) / batch_size