def test_mask_t(self): batsize = 5 lstm = q.LSTMCell(9, 10) x_t = torch.randn(batsize, 9) mask_t = torch.tensor([1, 1, 0, 1, 0]) c_tm1 = torch.randn(1, 10) h_tm1 = torch.randn(1, 10) lstm.c_0 = torch.tensor(c_tm1) lstm.y_0 = torch.tensor(h_tm1) y_t = lstm(x_t, mask_t=mask_t) self.assertEqual((batsize, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[2].detach().numpy())) self.assertFalse( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[1].detach().numpy())) self.assertTrue( np.allclose(h_tm1[0].detach().numpy(), lstm.y_tm1[4].detach().numpy()))
def test_dropout_rec(self): batsize = 5 lstm = q.LSTMCell(9, 10, dropout_rec=0.5) x_t = torch.randn(batsize, 9) c_tm1 = torch.randn(1, 10) y_tm1 = torch.randn(1, 10) lstm.c_0 = torch.tensor(c_tm1) lstm.y_0 = torch.tensor(y_tm1) y_t = lstm(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertEqual(lstm.training, True) lstm.train(False) self.assertEqual(lstm.training, False) q.batch_reset(lstm) pred1 = lstm(x_t) q.batch_reset(lstm) pred2 = lstm(x_t) self.assertTrue( np.allclose(pred1.detach().numpy(), pred2.detach().numpy())) lstm.train(True) self.assertEqual(lstm.training, True) q.batch_reset(lstm) pred1 = lstm(x_t) q.batch_reset(lstm) pred2 = lstm(x_t) self.assertFalse( np.allclose(pred1.detach().numpy(), pred2.detach().numpy()))
def test_zoneout(self): batsize = 5 q.LSTMCell.debug = False lstm = q.LSTMCell(9, 10, zoneout=0.5) x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9)))) h_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) c_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) lstm.set_init_states(c_tm1, h_tm1) y_t = lstm(x_t) self.assertEqual((5, 10), y_t.data.numpy().shape) self.assertEqual(lstm.training, True) lstm.train(mode=False) self.assertEqual(lstm.training, False) lstm.reset_state() pred1 = lstm(x_t) lstm.reset_state() pred2 = lstm(x_t) # must be equal in prediction mode self.assertTrue(np.allclose(pred1.data.numpy(), pred2.data.numpy())) lstm.train(mode=True) self.assertEqual(lstm.training, True) lstm.reset_state() pred1 = lstm(x_t) lstm.reset_state() pred2 = lstm(x_t) # must not be equal in prediction mode self.assertFalse(np.allclose(pred1.data.numpy(), pred2.data.numpy()))
def test_lstm_layer_shapes(self): batsize = 5 seqlen = 7 q.LSTMCell.debug = False lstm = q.LSTMCell(9, 10) lstm = lstm.to_layer().return_all() x = Variable(torch.FloatTensor(np.random.random((batsize, seqlen, 9)))) y = lstm(x) self.assertEqual((batsize, seqlen, 10), y.data.numpy().shape)
def test_lstm_shapes_non_cudnn(self): batsize = 5 lstm = q.LSTMCell(9, 10, use_cudnn_cell=False) x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9)))) h_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) c_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10)))) lstm.set_init_states(c_tm1, h_tm1) y_t = lstm(x_t) self.assertEqual((5, 10), y_t.data.numpy().shape)
def test_lstm_shapes(self): batsize = 5 lstm = q.LSTMCell(9, 10) x_t = torch.randn(batsize, 9) c_tm1 = torch.randn(1, 10) y_tm1 = torch.randn(1, 10) lstm.c_0 = torch.tensor(c_tm1) lstm.y_0 = torch.tensor(y_tm1) y_t = lstm(x_t) self.assertEqual((5, 10), y_t.detach().numpy().shape) self.assertTrue( np.allclose(lstm.y_tm1.detach().numpy(), y_t.detach().numpy())) q.batch_reset(lstm)
def test_it(self): x = np.random.randint(0, 100, (1000, 7)) y_inp = x[:, :-1] y_out = x[:, 1:] wD = dict((chr(xi), xi) for xi in range(100)) ctx = torch.randn(1000, 8, 30) decoder_emb = q.WordEmb(20, worddic=wD) decoder_lstm = q.LSTMCell(20, 30) decoder_att = q.DotAttention() decoder_out = q.WordLinout(60, worddic=wD) decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att, None, decoder_out) decoder_tf = q.TFDecoder(decoder_cell) y = decoder_tf(torch.tensor(x), ctx=ctx) self.assertTrue(y.size(), (1000, 7, 100)) # endregion
def test_it(self): D = "<MASK> [RED] NT(START) NT(a) T(b) NT(c) T(d) T(e) NT(f) T(g) T(h) T(i)" D = dict(zip(D.split(), range(len(D.split())))) tok2act = { k: (2 if k == "[RED]" else 1 if k[:2] == "NT" else 0) for k in D } class CustomCombiner(StackCellCombiner): def forward(self, _x, mask): ret = (_x * mask.unsqueeze(-1).float()).sum( 1) / mask.float().sum(1).unsqueeze(-1).clamp_min(1e-6) ret = ret.detach() # TODO: for grad debugging return ret class CustomWordLinout(q.WordLinout): def update(self, _): pass class Tok2Act(torch.nn.Module): def __init__(self, t2a, D): super(Tok2Act, self).__init__() self.D = D t2a_ = torch.zeros(max(D.values()) + 1).long() for k, v in t2a.items(): t2a_[D[k]] = v self.register_buffer("t2a", t2a_) def forward(self, _x): return self.t2a[_x] embdim = 4 coredim = 5 emb = q.WordEmb(embdim, worddic=D) core = q.LSTMCell(embdim, coredim, dropout_rec=.1) # combiner = BasicCombiner(embdim) combiner = CustomCombiner() att = BasicAttention() out = CustomWordLinout(coredim * 2, worddic=D) tok2act = Tok2Act(tok2act, D) cell = StackCell(emb=emb, tok2act=tok2act, core=core, combiner=combiner, att=att, out=out) ctx = torch.randn(2, 6, coredim) cell.save_ctx(ctx) ex1 = "NT(START) NT(a) T(b) NT(c) T(d) T(e) [RED] NT(f) T(g) T(h) [RED] T(i) [RED]" ex2 = "NT(START) NT(a) NT(c) T(d) T(e) [RED] [RED]" x1 = [D[exi] for exi in ex1.split()] + [0] x2 = [D[exi] for exi in ex2.split()] x2 = x2 + [0] * (len(x1) - len(x2)) x = torch.tensor([x1, x2]) cell._debug_embs = torch.nn.Parameter(torch.zeros(2, len(x1), embdim)) ys = [] for i in range(len(x[0])): y = cell(x[:, i]) ys.append(y) # print(cell._debug_embs) print(cell._debug_embs.size()) l = ys[11][0].sum() l.backward() print(cell._debug_embs.grad) print(cell._stack)
def run_seq2seq_( lr=0.001, batsize=32, evalbatsize=256, epochs=100, warmup=5, embdim=50, encdim=100, numlayers=2, dropout=.0, wreg=1e-6, cuda=False, gpu=0, ): settings = locals().copy() device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt = q.ticktock("script") tt.msg("running seq2seq on LC-QuAD") tt.tick("loading data") xsm, ysm, teststart, tok2act = load_data() _tok2act = {ysm.RD[k]: v for k, v in tok2act.items()} print("Some examples:") for i in range(5): print( f"{xsm[i]}\n ->{ysm[i]}\n -> {Node.from_transitions(' '.join(ysm[i].split()[1:]), _tok2act)}" ) print("Non-leaf tokens:") print({ysm.RD[k]: v for k, v in tok2act.items() if v > 0}) devstart = teststart - 500 trainds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[:devstart]).long(), torch.tensor(ysm.matrix[:devstart, :-1]).long(), torch.tensor(ysm.matrix[:devstart, 1:]).long()) valds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[devstart:teststart]).long(), torch.tensor(ysm.matrix[devstart:teststart, :-1]).long(), torch.tensor(ysm.matrix[devstart:teststart, 1:]).long()) testds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[teststart:]).long(), torch.tensor(ysm.matrix[teststart:, :-1]).long(), torch.tensor(ysm.matrix[teststart:, 1:]).long()) tt.msg( f"Data splits: train: {len(trainds)}, valid: {len(valds)}, test: {len(testds)}" ) tloader = torch.utils.data.DataLoader(trainds, batch_size=batsize, shuffle=True) vloader = torch.utils.data.DataLoader(valds, batch_size=evalbatsize, shuffle=False) xloader = torch.utils.data.DataLoader(testds, batch_size=evalbatsize, shuffle=False) tt.tock("data loaded") # model enclayers, declayers = numlayers, numlayers decdim = encdim xemb = q.WordEmb(embdim, worddic=xsm.D) yemb = q.WordEmb(embdim, worddic=ysm.D) encdims = [embdim] + [encdim // 2] * enclayers xenc = q.LSTMEncoder(embdim, *encdims[1:], bidir=True, dropout_in_shared=dropout) decdims = [embdim] + [decdim] * declayers dec_core = torch.nn.Sequential(*[ q.LSTMCell(decdims[i - 1], decdims[i], dropout_in=dropout, dropout_rec=dropout) for i in range(1, len(decdims)) ]) yout = q.WordLinout(encdim + decdim, worddic=ysm.D) dec_cell = semparse.rnn.LuongCell(emb=yemb, core=dec_core, out=yout, dropout=dropout) decoder = q.TFDecoder(dec_cell) testdecoder = q.FreeDecoder(dec_cell, maxtime=100) m = Seq2Seq(xemb, xenc, decoder) testm = Seq2Seq(xemb, xenc, testdecoder, test=True) # test model tt.tick("running a batch") test_y = m(*iter(tloader).next()[:-1]) q.batch_reset(m) test_y = testm(*iter(vloader).next()[:-1]) q.batch_reset(m) tt.tock(f"ran a batch: {test_y.size()}") optim = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=wreg) tlosses = [ q.CELoss(mode="logits", ignore_index=0), q.Accuracy(ignore_index=0), q.SeqAccuracy(ignore_index=0) ] xlosses = [ q.CELoss(mode="logits", ignore_index=0), q.Accuracy(ignore_index=0), q.SeqAccuracy(ignore_index=0) ] tlosses = [q.LossWrapper(l) for l in tlosses] vlosses = [q.LossWrapper(l) for l in xlosses] xlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=tloader, optim=optim, losses=tlosses, device=device) devloop = partial(q.test_epoch, model=testm, dataloader=vloader, losses=vlosses, device=device) testloop = partial(q.test_epoch, model=testm, dataloader=xloader, losses=xlosses, device=device) lrplateau = q.util.ReduceLROnPlateau(optim, mode="max", factor=.1, patience=3, cooldown=1, warmup=warmup, threshold=0., verbose=True, eps=1e-9) on_after_valid = [lambda: lrplateau.step(vlosses[1].get_epoch_error())] _devloop = partial(devloop, on_end=on_after_valid) stoptrain = [lambda: all([pg["lr"] <= 1e-7 for pg in optim.param_groups])] tt.tick("training") q.run_training(trainloop, _devloop, max_epochs=epochs, check_stop=stoptrain) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) settings["testres"] = testres tt.tock("tested") devres = devloop() print(devres, vlosses[0].get_epoch_error()) return vlosses[1].get_epoch_error()