def test_mask_t(self): batsize = 5 lstm = q.LSTMCell(9, 10) x_t = torch.randn(batsize, 9) mask_t = torch.tensor([1, 1, 0, 1, 0]) c_tm1 = torch.randn(1, 10) h_tm1 = torch.randn(1, 10) lstm.c_0 = q.val(c_tm1).v lstm.y_0 = q.val(h_tm1).v y_t = lstm(x_t, mask_t=mask_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[4].detach().numpy()))
def test_zoneout(self): batsize = 5 lstm = q.LSTMCell(9, 10, zoneout=0.5) x_t = torch.randn(batsize, 9) c_tm1 = torch.randn(1, 10) y_tm1 = torch.randn(1, 10) lstm.c_0 = q.val(c_tm1).v lstm.y_0 = q.val(y_tm1).v y_t = lstm(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertEqual(lstm.training, True) lstm.train(False) self.assertEqual(lstm.training, False) lstm.rec_reset() pred1 = lstm(x_t) lstm.rec_reset() pred2 = lstm(x_t) self.assertTrue( np.allclose(pred1.detach().numpy(), pred2.detach().numpy())) lstm.train(True) self.assertEqual(lstm.training, True) lstm.rec_reset() pred1 = lstm(x_t) lstm.rec_reset() pred2 = lstm(x_t) self.assertFalse( np.allclose(pred1.detach().numpy(), pred2.detach().numpy()))
def __init__(self, D, embdim, zdim, startsym, *innerdim, **kw): super(Decoder, self).__init__() self.emb = q.WordEmb(embdim, worddic=D) innerdim = (embdim+zdim,) + innerdim self.layers = torch.nn.ModuleList(modules=[ q.LSTMCell(innerdim[i-1], innerdim[i]) for i in range(1, len(innerdim)) ]) self.linout = q.WordLinout(innerdim[-1], worddic=D) self.sm = torch.nn.Softmax(-1) self.maxtime = q.getkw(kw, "maxtime", 100) self.startid = D[startsym] self.sm_sample = True self.zdim = zdim
def test_lstm_shapes(self): batsize = 5 lstm = q.LSTMCell(9, 10) x_t = torch.randn(batsize, 9) c_tm1 = torch.randn(1, 10) y_tm1 = torch.randn(1, 10) lstm.c_0 = q.val(c_tm1).v lstm.y_0 = q.val(y_tm1).v y_t = lstm(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(lstm.y_tm1.detach().numpy(), y_t.detach().numpy())) q.rec_reset(lstm)
def test_it(self): x = np.random.randint(0, 100, (1000, 7)) y_inp = x[:, :-1] y_out = x[:, 1:] wD = dict((chr(xi), xi) for xi in range(100)) ctx = torch.randn(1000, 8, 30) decoder_emb = q.WordEmb(20, worddic=wD) decoder_lstm = q.LSTMCell(20, 30) decoder_att = q.DotAttention() decoder_out = q.WordLinout(60, worddic=wD) decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att, None, decoder_out) decoder_tf = q.TFDecoder(decoder_cell) y = decoder_tf(torch.tensor(x), ctx=ctx) self.assertTrue(y.size(), (1000, 7, 100))
def run(lr=0.001, dropout=0.2, batsize=50, embdim=50, encdim=50, decdim=50, numlayers=1, bidir=False, which="geo", # "geo", "atis", "jobs" test=True, ): settings = locals().copy() logger = q.log.Logger(prefix="seq2seq_base") logger.save_settings(**settings) # region data nlsm, qlsm, splitidxs = load_data(which=which) print(nlsm[0], qlsm[0]) print(nlsm._rarewords) trainloader = q.dataload(nlsm.matrix[:splitidxs[0]], qlsm.matrix[:splitidxs[0]], batch_size=batsize, shuffle=True) devloader = q.dataload(nlsm.matrix[splitidxs[0]:splitidxs[1]], qlsm.matrix[splitidxs[0]:splitidxs[1]], batch_size=batsize, shuffle=False) testloader = q.dataload(nlsm.matrix[splitidxs[1]:], qlsm.matrix[splitidxs[1]:], batch_size=batsize, shuffle=False) # endregion # region model encdims = [encdim] * numlayers outdim = (encdim if not bidir else encdim * 2) + decdim nlemb = q.WordEmb(embdim, worddic=nlsm.D) qlemb = q.WordEmb(embdim, worddic=qlsm.D) nlenc = q.LSTMEncoder(embdim, *encdims, bidir=bidir, dropout_in=dropout) att = q.att.DotAtt() if numlayers > 1: qldec_core = torch.nn.Sequential( *[q.LSTMCell(_indim, _outdim, dropout_in=dropout) for _indim, _outdim in [(embdim, decdim)] + [(decdim, decdim)] * (numlayers - 1)] ) else: qldec_core = q.LSTMCell(embdim, decdim, dropout_in=dropout) qlout = q.WordLinout(outdim, worddic=qlsm.D) qldec = q.LuongCell(emb=qlemb, core=qldec_core, att=att, out=qlout) class Model(torch.nn.Module): def __init__(self, _nlemb, _nlenc, _qldec, train=True, **kw): super(Model, self).__init__(**kw) self.nlemb, self.nlenc, self._q_train = _nlemb, _nlenc, train if train: self.qldec = q.TFDecoder(_qldec) else: self.qldec = q.FreeDecoder(_qldec, maxtime=100) def forward(self, x, y): # (batsize, seqlen) int ids xemb, xmask = self.nlemb(x) xenc = self.nlenc(xemb, mask=xmask) if self._q_train is False: assert(y.dim() == 2) dec = self.qldec(y, ctx=xenc, ctxmask=xmask[:, :xenc.size(1)]) return dec m_train = Model(nlemb, nlenc, qldec, train=True) m_test = Model(nlemb, nlenc, qldec, train=False) if test: test_out = m_train(torch.tensor(nlsm.matrix[:5]), torch.tensor(qlsm.matrix[:5])) print("test_out.size() = {}".format(test_out.size()))
def run_normal_seqvae_toy( lr=0.001, embdim=64, encdim=100, zdim=64, batsize=50, epochs=100, ): # test vocsize = 100 seqlen = 12 wD = dict((chr(xi), xi) for xi in range(vocsize)) # region encoder encoder_emb = q.WordEmb(embdim, worddic=wD) encoder_lstm = q.FastestLSTMEncoder(embdim, encdim) class EncoderNet(torch.nn.Module): def __init__(self, emb, core): super(EncoderNet, self).__init__() self.emb, self.core = emb, core def forward(self, x): embs, mask = self.emb(x) out, states = self.core(embs, mask, ret_states=True) top_state = states[-1][0][:, 0] # top_state = top_state.unsqueeze(1).repeat(1, out.size(1), 1) return top_state # (batsize, encdim) encoder_net = EncoderNet(encoder_emb, encoder_lstm) encoder = Posterior(encoder_net, encdim, zdim) # endregion # region decoder decoder_emb = q.WordEmb(embdim, worddic=wD) decoder_lstm = q.LSTMCell(embdim + zdim, encdim) decoder_outlin = q.WordLinout(encdim, worddic=wD) class DecoderCell(torch.nn.Module): def __init__(self, emb, core, out, **kw): super(DecoderCell, self).__init__() self.emb, self.core, self.out = emb, core, out def forward(self, xs, z=None): embs, mask = self.emb(xs) core_inp = torch.cat([embs, z], 1) core_out = self.core(core_inp) out = self.out(core_out) return out decoder_cell = DecoderCell(decoder_emb, decoder_lstm, decoder_outlin) decoder = q.TFDecoder(decoder_cell) # endregion likelihood = Likelihood() vae = SeqVAE(encoder, decoder, likelihood) x = torch.randint(0, vocsize, (batsize, seqlen), dtype=torch.int64) ys = vae(x) optim = torch.optim.Adam(q.params_of(vae), lr=lr) x = torch.randint(0, vocsize, (batsize * 100, seqlen), dtype=torch.int64) dataloader = q.dataload(x, batch_size=batsize, shuffle=True) trainer = q.trainer(vae).on(dataloader).optimizer(optim).loss(4).epochs( epochs) trainer.run() print("done \n\n")