def test_mask(self): batsize, seqlen, indim = 5, 3, 4 m = q.GRULayer(indim, 6).return_final() data = Variable( torch.FloatTensor(np.random.random((batsize, seqlen, indim)))) mask = Variable( torch.LongTensor([ [1, 1, 0], [1, 1, 0], [1, 1, 0], [1, 1, 1], [1, 0, 0], ])) final, pred = m(data, mask=mask) print(pred) # self.assertFalse(True) self.assertEqual((batsize, seqlen, 6), pred.size()) self.assertEqual((batsize, 6), m.get_states(0)[0].size()) pred = pred.data.numpy() final = final.data.numpy() self.assertTrue( np.allclose(pred[0, 2, :], np.zeros_like(pred[0, 2, :]))) self.assertTrue( np.allclose(pred[4, 1:, :], np.zeros_like(pred[4, 1:, :]))) self.assertTrue(np.allclose(final[0, :], pred[0, 1, :])) self.assertTrue(np.allclose(final[4, :], pred[4, 0, :])) self.assertTrue(np.allclose(final[3, :], pred[3, 2, :]))
def test_shapes(self): batsize, seqlen, indim = 5, 3, 4 m = q.GRULayer(indim, 6) data = Variable( torch.FloatTensor(np.random.random((batsize, seqlen, indim)))) pred = m(data) print(pred) self.assertEqual((batsize, seqlen, 6), pred.size()) self.assertEqual((batsize, 6), m.get_states(0)[0].size())
def test_shapes(self): batsize, seqlen, inpdim = 5, 7, 8 vocsize, embdim, encdim = 20, 9, 10 ctxtoinitff = q.Forward(inpdim, encdim) coreff = q.Forward(encdim, encdim) initstategen = q.Lambda(lambda *x, **kw: coreff(ctxtoinitff(x[1][:, -1, :])), register_modules=coreff) decoder = q.AttentionDecoder( attention=q.Attention().forward_gen(inpdim, encdim+embdim, encdim), embedder=nn.Embedding(vocsize, embdim), core=q.RecurrentStack( q.GRULayer(embdim, encdim), q.GRULayer(encdim, encdim), coreff ), smo=q.Stack( q.Forward(encdim+inpdim, encdim), q.Forward(encdim, vocsize), q.Softmax(), q.argmap.spec(0), ), ctx_to_smo=True, state_to_smo=True, decinp_to_att=True ) ctx = np.random.random((batsize, seqlen, inpdim)) ctx = Variable(torch.FloatTensor(ctx)) ctxmask = np.ones((batsize, seqlen)) ctxmask[:, -2:] = 0 ctxmask[[0, 1], -3:] = 0 ctxmask = Variable(torch.FloatTensor(ctxmask)) inp = np.random.randint(0, vocsize, (batsize, seqlen)) inp = Variable(torch.LongTensor(inp)) decoded = decoder(inp, ctx, ctxmask) self.assertEqual((batsize, seqlen, vocsize), decoded.size()) self.assertTrue(np.allclose( np.sum(decoded.data.numpy(), axis=-1), np.ones_like(np.sum(decoded.data.numpy(), axis=-1)))) print(decoded.size())
def test_shapes(self): batsize, seqlen, vocsize, embdim, encdim = 5, 3, 20, 4, 6 m = q.RecurrentStack(nn.Embedding(vocsize, embdim), q.GRULayer(embdim, encdim), q.Forward(encdim, vocsize), q.LogSoftmax()) data = Variable( torch.LongTensor(np.random.randint(0, vocsize, (batsize, seqlen)))) pred = m(data) print(pred) self.assertEqual((batsize, seqlen, vocsize), pred.size()) self.assertEqual((batsize, encdim), m.get_states(0)[0].size())
def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers if mode == "qrnn": tt.msg("using q.RNN") self.rnn = RecStack(*[GRUCell(input_size, hidden_size, use_cudnn_cell=False, rec_batch_norm="main")] + [GRUCell(hidden_size, hidden_size) for i in range(num_layers - 1)])\ .to_layer().return_all() elif mode == "nn": tt.msg("using nn.RNN") self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True) elif mode == "stack": self.rnn = q.RecurrentStack( *([q.GRULayer(input_size, hidden_size)] + [ q.GRULayer(hidden_size, hidden_size) for i in range(num_layers - 1) ])) self.fc = nn.Linear(hidden_size, num_classes)
def test_masked_3D_data(self): self.linout.data = q.val( np.random.random((7, 10, 3)).astype(dtype="float32")).v self.linout.computer = q.GRULayer(3, 15).return_final("only") x = Variable(torch.randn(3, 15)).float() msk_nonzero_batches = [0, 0, 0, 1, 1, 2] msk_nonzero_values = [0, 2, 3, 2, 6, 5] msk = np.zeros((3, 7)).astype("int32") msk[msk_nonzero_batches, msk_nonzero_values] = 1 print(msk) msk = Variable(torch.from_numpy(msk)) out = self.linout(x, mask=msk) self.assertEqual(out.size(), (3, 7)) data = self.linout.data computer = self.linout.computer cout = torch.matmul(x, computer(data).t()) cout = cout * msk.float() self.assertTrue(np.allclose(cout.data.numpy(), out.data.numpy()))
def test_fast_context_decoder_shape(self): batsize, seqlen, vocsize = 5, 4, 7 embdim, encdim, outdim, ctxdim = 10, 16, 10, 8 # model def decoder = q.ContextDecoder( nn.Embedding(vocsize, embdim, padding_idx=0), q.RecurrentStack( q.GRULayer(embdim + ctxdim, encdim), q.Forward(encdim, vocsize), q.Softmax() ) ) # end model def data = np.random.randint(0, vocsize, (batsize, seqlen)) data = Variable(torch.LongTensor(data)) ctx = Variable(torch.FloatTensor(np.random.random((batsize, ctxdim)))) decoded = decoder(data, ctx).data.numpy() self.assertEqual(decoded.shape, (batsize, seqlen, vocsize)) # shape check self.assertTrue(np.allclose(np.sum(decoded, axis=-1), np.ones_like(np.sum(decoded, axis=-1)))) # prob check
def make_encoder(src_emb, embdim=100, dim=100, **kw): """ make encoder # concatenating bypass encoder: # embedding --> top GRU # --> 1st BiGRU # 1st BiGRU --> top GRU # --> 2nd BiGRU # 2nd BiGRU --> top GRU """ encoder = q.RecurrentStack( src_emb, # embs, masks q.argsave.spec(emb=0, mask=1), q.argmap.spec(0, mask=["mask"]), q.BidirGRULayer(embdim, dim), q.argsave.spec(bypass=0), q.argmap.spec(0, mask=["mask"]), q.BidirGRULayer(dim * 2, dim), q.argmap.spec(0, ["bypass"], ["emb"]), q.Lambda(lambda x, y, z: torch.cat([x, y, z], 1)), q.argmap.spec(0, mask=["mask"]), q.GRULayer(dim * 4 + embdim, dim).return_final(True), q.argmap.spec(1, ["mask"], 0), ) # returns (all_states, mask, final_state) return encoder
def main( lr=0.5, epochs=30, batsize=32, embdim=90, encdim=90, mode="cell", # "fast" or "cell" wreg=0.0001, cuda=False, gpu=1, ): if cuda: torch.cuda.set_device(gpu) usecuda = cuda vocsize = 50 # create datasets tensor tt.tick("loading data") sequences = np.random.randint(0, vocsize, (batsize * 100, 16)) # wrap in dataset dataset = q.TensorDataset(sequences[:batsize * 80], sequences[:batsize * 80]) validdataset = q.TensorDataset(sequences[batsize * 80:], sequences[batsize * 80:]) dataloader = DataLoader(dataset=dataset, batch_size=batsize, shuffle=True) validdataloader = DataLoader(dataset=validdataset, batch_size=batsize, shuffle=False) tt.tock("data loaded") # model tt.tick("building model") embedder = nn.Embedding(vocsize, embdim) encoder = q.RecurrentStack( embedder, q.SRUCell(encdim).to_layer(), q.SRUCell(encdim).to_layer(), q.SRUCell(encdim).to_layer(), q.SRUCell(encdim).to_layer().return_final(), ) if mode == "fast": decoder = q.AttentionDecoder( attention=q.Attention().forward_gen(encdim, encdim, encdim), embedder=embedder, core=q.RecurrentStack(q.GRULayer(embdim, encdim)), smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()), return_att=True) else: decoder = q.AttentionDecoderCell( attention=q.Attention().forward_gen(encdim, encdim + embdim, encdim), embedder=embedder, core=q.RecStack( q.GRUCell(embdim + encdim, encdim, use_cudnn_cell=False, rec_batch_norm=None, activation="crelu")), smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()), att_after_update=False, ctx_to_decinp=True, decinp_to_att=True, return_att=True, ).to_decoder() m = EncDec(encoder, decoder, mode=mode) losses = q.lossarray(q.SeqNLLLoss(ignore_index=None), q.SeqAccuracy(ignore_index=None), q.SeqElemAccuracy(ignore_index=None)) validlosses = q.lossarray(q.SeqNLLLoss(ignore_index=None), q.SeqAccuracy(ignore_index=None), q.SeqElemAccuracy(ignore_index=None)) optimizer = torch.optim.Adadelta(m.parameters(), lr=lr, weight_decay=wreg) tt.tock("model built") q.train(m).cuda(usecuda).train_on(dataloader, losses)\ .set_batch_transformer(lambda x, y: (x, y[:, :-1], y[:, 1:]))\ .valid_on(validdataloader, validlosses)\ .optimizer(optimizer).clip_grad_norm(2.)\ .train(epochs) testdat = np.random.randint(0, vocsize, (batsize, 20)) testdata = q.var(torch.from_numpy(testdat)).cuda(usecuda).v testdata_out = q.var(torch.from_numpy(testdat)).cuda(usecuda).v if mode == "cell" and False: inv_idx = torch.arange(testdata.size(1) - 1, -1, -1).long() testdata = testdata.index_select(1, inv_idx) probs, attw = m(testdata, testdata_out[:, :-1]) def plot(x): sns.heatmap(x) plt.show() embed()
def main( # Hyper Parameters sequence_length=28, input_size=28, hidden_size=128, num_layers=2, batch_size=5, num_epochs=2, learning_rate=0.01, ctx_to_decinp=False, gpu=False, mode="stack", # "nn" or "qrnn" or "stack" trivial=False, ): tt.msg("using q: {}".format(mode)) # MNIST Dataset train_dataset = dsets.MNIST(root='../../../datasets/mnist/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='../../../datasets/mnist/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) if gpu: q.var.all_cuda = True encoder = Encoder(input_size, hidden_size, num_layers, mode=mode) embdim = hidden_size decdim = 100 initstate = nn.Linear(hidden_size, decdim) decoder = q.ContextDecoder(*[ nn.Embedding(256, embdim), q.RecurrentStack( q.GRULayer((embdim + hidden_size if ctx_to_decinp else embdim), decdim), nn.Linear(decdim, 256), nn.LogSoftmax()) ], ctx_to_h0=initstate, ctx_to_decinp=ctx_to_decinp) if trivial: encdec = IdxToSeq(decoder, embdim=hidden_size) else: encdec = ImgToSeq(encoder, decoder) if gpu: encdec.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() criterion = q.SeqNLLLoss(ignore_index=0) if gpu: criterion.cuda() optimizer = torch.optim.Adadelta(encdec.parameters(), lr=learning_rate) tt.msg("training") # Train the Model for epoch in range(num_epochs): tt.tick() btt = ticktock("batch") btt.tick() for i, (images, labels) in enumerate(train_loader): #btt.tick("doing batch") images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v labels = q.var(labels).cuda(crit=gpu).v tgt = number2charseq(labels) if trivial: images = labels # Forward + Backward + Optimize optimizer.zero_grad() outputs = encdec(images, tgt[:, :-1]) loss = criterion(outputs, tgt[:, 1:]) loss.backward() optimizer.step() if (i + 1) % 100 == 0: btt.tock("100 batches done") tgn = 0 for param in encdec.parameters(): tgn = tgn + torch.norm(param.grad, 2) print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, TGN: %.8f' % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, loss.data[0], tgn.cpu().data.numpy()[0])) btt.tick() #tt.tock("batch done") tt.tock("epoch {} done {}".format(epoch, loss.data[0])) # Test the Model correct = 0 total = 0 for images, labels in test_loader: images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v labels = q.var(labels).cuda(crit=gpu).v if trivial: images = labels tgt = number2charseq(labels) outputs = encdec(images, tgt[:, :-1]) _, predicted = torch.max(outputs.data, 2) if tgt.is_cuda: tgt = tgt.cpu() if predicted.is_cuda: predicted = predicted.cpu() tgt = tgt[:, 1:].data.numpy() predicted = predicted.numpy() # print(predicted[:10]) # print(tgt[:10]) # print(labels[:10]) tgtmask = tgt == 0 eq = predicted == tgt eq = eq | tgtmask eq = np.all(eq, axis=1) correct += eq.sum() total += labels.size(0) print('Test Accuracy of the model on the 10000 test images: %d %%' % (100. * correct / total)) # Save the Model torch.save(encdec.state_dict(), 'rnn.pkl')