def test_shapes(self): batsize = 5 m = q.RecStack(q.GRUCell(9, 10), q.GRUCell(10, 11)) x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9)))) h_tm1_a = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) h_tm1_b = Variable(torch.FloatTensor(np.random.random((batsize, 11)))) m.set_init_states(h_tm1_a, h_tm1_b) y_t = m(x_t) self.assertEqual((batsize, 11), y_t.data.numpy().shape)
def test_masked_gru_bidir(self): batsize = 3 seqlen = 4 q.GRUCell.debug = False gru = q.GRUCell(9, 5) gru2 = q.GRUCell(9, 5) layer = q.BiRNNLayer(gru, gru2, mode="cat").return_final().return_all() x = Variable(torch.FloatTensor(np.random.random((batsize, seqlen, 9)))) m_val = np.asarray([[1, 1, 1, 0], [1, 0, 0, 0], [1, 1, 1, 1]]) m = Variable(torch.FloatTensor(m_val)) y_t, y = layer(x, mask=m) pred = y.data.numpy()
def test_mask_t(self): batsize = 5 gru = q.GRUCell(9, 10) x_t = torch.randn(batsize, 9) mask_t = torch.tensor([1, 1, 0, 1, 0]) h_tm1 = torch.randn(1, 10) gru.h_0 = torch.tensor(h_tm1) y_t = gru(x_t, mask_t=mask_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), gru.h_tm1[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), gru.h_tm1[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), gru.h_tm1[4].detach().numpy()))
def test_dropout_rec(self): batsize = 5 gru = q.GRUCell(9, 10, dropout_rec=0.5) x_t = torch.randn(batsize, 9) h_tm1 = torch.randn(1, 10) gru.h_0 = torch.tensor(h_tm1) y_t = gru(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertEqual(gru.training, True) gru.train(False) self.assertEqual(gru.training, False) q.batch_reset(gru) pred1 = gru(x_t) q.batch_reset(gru) pred2 = gru(x_t) self.assertTrue( np.allclose(pred1.detach().numpy(), pred2.detach().numpy())) gru.train(True) self.assertEqual(gru.training, True) q.batch_reset(gru) pred1 = gru(x_t) q.batch_reset(gru) pred2 = gru(x_t) self.assertFalse( np.allclose(pred1.detach().numpy(), pred2.detach().numpy()))
def test_zoneout(self): batsize = 5 q.GRUCell.debug = False gru = q.GRUCell(9, 10, zoneout=0.5) x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9)))) h_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) gru.set_init_states(h_tm1) h_t = gru(x_t) self.assertEqual((5, 10), h_t.data.numpy().shape) self.assertEqual(gru.training, True) gru.train(mode=False) self.assertEqual(gru.training, False) gru.reset_state() pred1 = gru(x_t) gru.reset_state() pred2 = gru(x_t) # must be equal in prediction mode print(pred1) print(pred2) self.assertTrue(np.allclose(pred1.data.numpy(), pred2.data.numpy())) gru.train(mode=True) self.assertEqual(gru.training, True) gru.reset_state() pred1 = gru(x_t) gru.reset_state() pred2 = gru(x_t) # must not be equal in training mode self.assertFalse(np.allclose(pred1.data.numpy(), pred2.data.numpy()))
def test_masked_gru_stack(self): batsize = 3 seqlen = 4 m = q.RecStack(q.GRUCell(9, 10), q.GRUCell(10, 11)) x = Variable(torch.FloatTensor(np.random.random((batsize, seqlen, 9)))) h_tm1_a = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) h_tm1_b = Variable(torch.FloatTensor(np.random.random((batsize, 11)))) m.set_init_states(h_tm1_a, h_tm1_b) m = m.to_layer().return_final().return_all() mask_val = np.asarray([[1, 1, 1, 0], [1, 0, 0, 0], [1, 1, 1, 1]]) mask = Variable(torch.FloatTensor(mask_val)) y_t, y = m(x, mask=mask) self.assertTrue(np.allclose(y_t.data.numpy(), y.data.numpy()[:, -1]))
def test_gru_shapes(self): batsize = 5 gru = q.GRUCell(9, 10) x_t = torch.randn(batsize, 9) h_tm1 = torch.randn(1, 10) gru.h_0 = torch.nn.Parameter(torch.tensor(h_tm1)) y_t = gru(x_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape)
def test_gru_shapes(self): batsize = 5 q.GRUCell.debug = True gru = q.GRUCell(9, 10) x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9)))) h_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) gru.set_init_states(h_tm1) y_t = gru(x_t) self.assertEqual((5, 10), y_t.data.numpy().shape)
def test_shapes(self): batsize, seqlen, inpdim = 5, 7, 8 vocsize, embdim, encdim = 20, 9, 10 ctxtoinitff = q.Forward(inpdim, encdim) coreff = q.Forward(encdim, encdim) initstategen = q.Lambda(lambda *x, **kw: coreff(ctxtoinitff(x[1][:, -1, :])), register_modules=coreff) decoder_cell = q.AttentionDecoderCell( attention=q.Attention().forward_gen(inpdim, encdim+embdim, encdim), embedder=nn.Embedding(vocsize, embdim), core=q.RecStack( q.GRUCell(embdim + inpdim, encdim), q.GRUCell(encdim, encdim), coreff ), smo=q.Stack( q.Forward(encdim+inpdim, encdim), q.Forward(encdim, vocsize), q.Softmax() ), init_state_gen=initstategen, ctx_to_decinp=True, ctx_to_smo=True, state_to_smo=True, decinp_to_att=True ) decoder = decoder_cell.to_decoder() ctx = np.random.random((batsize, seqlen, inpdim)) ctx = Variable(torch.FloatTensor(ctx)) ctxmask = np.ones((batsize, seqlen)) ctxmask[:, -2:] = 0 ctxmask[[0, 1], -3:] = 0 ctxmask = Variable(torch.FloatTensor(ctxmask)) inp = np.random.randint(0, vocsize, (batsize, seqlen)) inp = Variable(torch.LongTensor(inp)) decoded = decoder(inp, ctx, ctxmask) self.assertEqual((batsize, seqlen, vocsize), decoded.size()) self.assertTrue(np.allclose( np.sum(decoded.data.numpy(), axis=-1), np.ones_like(np.sum(decoded.data.numpy(), axis=-1)))) print(decoded.size())
def test_masked_gru_reverse(self): batsize = 3 seqlen = 4 q.GRUCell.debug = False gru = q.GRUCell(9, 10) gru = gru.to_layer().return_all().return_final() x = Variable(torch.FloatTensor(np.random.random((batsize, seqlen, 9)))) m_val = np.asarray([[1, 1, 1, 0], [1, 0, 0, 0], [1, 1, 1, 1]]) m = Variable(torch.FloatTensor(m_val)) y_t, y = gru(x, mask=m, reverse=True) pred = y.data.numpy() self.assertTrue(np.allclose(y_t.data.numpy(), y.data.numpy()[:, 0]))
def make_decoder(emb, lin, ctxdim=100, embdim=100, dim=100, attmode="bilin", decsplit=False, **kw): """ makes decoder # attention cell decoder that accepts VNT !!! """ ctxdim = ctxdim if not decsplit else ctxdim // 2 coreindim = embdim + ctxdim # if ctx_to_decinp is True else embdim coretocritdim = dim if not decsplit else dim // 2 critdim = dim + embdim # if decinp_to_att is True else dim if attmode == "bilin": attention = q.Attention().bilinear_gen(ctxdim, critdim) elif attmode == "fwd": attention = q.Attention().forward_gen(ctxdim, critdim) else: raise q.SumTingWongException() attcell = q.AttentionDecoderCell(attention=attention, embedder=emb, core=q.RecStack( q.GRUCell(coreindim, dim), q.GRUCell(dim, dim), ), smo=q.Stack( q.argsave.spec(mask={"mask"}), lin, q.argmap.spec(0, mask=["mask"]), q.LogSoftmax(), q.argmap.spec(0), ), ctx_to_decinp=True, ctx_to_smo=True, state_to_smo=True, decinp_to_att=True, state_split=decsplit) return attcell.to_decoder()
def test_simple_decoder_shape(self): batsize, seqlen, vocsize = 5, 4, 7 embdim, encdim, outdim = 10, 16, 10 # model def decoder_cell = q.DecoderCell( nn.Embedding(vocsize, embdim, padding_idx=0), q.GRUCell(embdim, encdim), q.Forward(encdim, vocsize), q.Softmax() ) decoder = decoder_cell.to_decoder() # end model def data = np.random.randint(0, vocsize, (batsize, seqlen)) data = Variable(torch.LongTensor(data)) decoded = decoder(data).data.numpy() self.assertEqual(decoded.shape, (batsize, seqlen, vocsize)) # shape check self.assertTrue(np.allclose(np.sum(decoded, axis=-1), np.ones_like(np.sum(decoded, axis=-1)))) # prob check
def main( lr=0.5, epochs=30, batsize=32, embdim=90, encdim=90, mode="cell", # "fast" or "cell" wreg=0.0001, cuda=False, gpu=1, ): if cuda: torch.cuda.set_device(gpu) usecuda = cuda vocsize = 50 # create datasets tensor tt.tick("loading data") sequences = np.random.randint(0, vocsize, (batsize * 100, 16)) # wrap in dataset dataset = q.TensorDataset(sequences[:batsize * 80], sequences[:batsize * 80]) validdataset = q.TensorDataset(sequences[batsize * 80:], sequences[batsize * 80:]) dataloader = DataLoader(dataset=dataset, batch_size=batsize, shuffle=True) validdataloader = DataLoader(dataset=validdataset, batch_size=batsize, shuffle=False) tt.tock("data loaded") # model tt.tick("building model") embedder = nn.Embedding(vocsize, embdim) encoder = q.RecurrentStack( embedder, q.SRUCell(encdim).to_layer(), q.SRUCell(encdim).to_layer(), q.SRUCell(encdim).to_layer(), q.SRUCell(encdim).to_layer().return_final(), ) if mode == "fast": decoder = q.AttentionDecoder( attention=q.Attention().forward_gen(encdim, encdim, encdim), embedder=embedder, core=q.RecurrentStack(q.GRULayer(embdim, encdim)), smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()), return_att=True) else: decoder = q.AttentionDecoderCell( attention=q.Attention().forward_gen(encdim, encdim + embdim, encdim), embedder=embedder, core=q.RecStack( q.GRUCell(embdim + encdim, encdim, use_cudnn_cell=False, rec_batch_norm=None, activation="crelu")), smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()), att_after_update=False, ctx_to_decinp=True, decinp_to_att=True, return_att=True, ).to_decoder() m = EncDec(encoder, decoder, mode=mode) losses = q.lossarray(q.SeqNLLLoss(ignore_index=None), q.SeqAccuracy(ignore_index=None), q.SeqElemAccuracy(ignore_index=None)) validlosses = q.lossarray(q.SeqNLLLoss(ignore_index=None), q.SeqAccuracy(ignore_index=None), q.SeqElemAccuracy(ignore_index=None)) optimizer = torch.optim.Adadelta(m.parameters(), lr=lr, weight_decay=wreg) tt.tock("model built") q.train(m).cuda(usecuda).train_on(dataloader, losses)\ .set_batch_transformer(lambda x, y: (x, y[:, :-1], y[:, 1:]))\ .valid_on(validdataloader, validlosses)\ .optimizer(optimizer).clip_grad_norm(2.)\ .train(epochs) testdat = np.random.randint(0, vocsize, (batsize, 20)) testdata = q.var(torch.from_numpy(testdat)).cuda(usecuda).v testdata_out = q.var(torch.from_numpy(testdat)).cuda(usecuda).v if mode == "cell" and False: inv_idx = torch.arange(testdata.size(1) - 1, -1, -1).long() testdata = testdata.index_select(1, inv_idx) probs, attw = m(testdata, testdata_out[:, :-1]) def plot(x): sns.heatmap(x) plt.show() embed()