Exemple #1
0
    def test_mask_t(self):
        batsize = 5
        lstm = q.LSTMCell(9, 10)
        x_t = torch.randn(batsize, 9)
        mask_t = torch.tensor([1, 1, 0, 1, 0])
        c_tm1 = torch.randn(1, 10)
        h_tm1 = torch.randn(1, 10)
        lstm.c_0 = torch.tensor(c_tm1)
        lstm.y_0 = torch.tensor(h_tm1)
        y_t = lstm(x_t, mask_t=mask_t)
        self.assertEqual((batsize, 10), y_t.detach().numpy().shape)

        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(), y_t[2].detach().numpy()))
        self.assertFalse(
            np.allclose(h_tm1[0].detach().numpy(), y_t[1].detach().numpy()))
        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(), y_t[4].detach().numpy()))

        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(),
                        lstm.y_tm1[2].detach().numpy()))
        self.assertFalse(
            np.allclose(h_tm1[0].detach().numpy(),
                        lstm.y_tm1[1].detach().numpy()))
        self.assertTrue(
            np.allclose(h_tm1[0].detach().numpy(),
                        lstm.y_tm1[4].detach().numpy()))
Exemple #2
0
    def test_dropout_rec(self):
        batsize = 5
        lstm = q.LSTMCell(9, 10, dropout_rec=0.5)
        x_t = torch.randn(batsize, 9)
        c_tm1 = torch.randn(1, 10)
        y_tm1 = torch.randn(1, 10)
        lstm.c_0 = torch.tensor(c_tm1)
        lstm.y_0 = torch.tensor(y_tm1)
        y_t = lstm(x_t)
        self.assertEqual((5, 10), y_t.detach().numpy().shape)

        self.assertEqual(lstm.training, True)
        lstm.train(False)
        self.assertEqual(lstm.training, False)

        q.batch_reset(lstm)
        pred1 = lstm(x_t)
        q.batch_reset(lstm)
        pred2 = lstm(x_t)

        self.assertTrue(
            np.allclose(pred1.detach().numpy(),
                        pred2.detach().numpy()))

        lstm.train(True)
        self.assertEqual(lstm.training, True)

        q.batch_reset(lstm)
        pred1 = lstm(x_t)
        q.batch_reset(lstm)
        pred2 = lstm(x_t)

        self.assertFalse(
            np.allclose(pred1.detach().numpy(),
                        pred2.detach().numpy()))
Exemple #3
0
 def test_zoneout(self):
     batsize = 5
     q.LSTMCell.debug = False
     lstm = q.LSTMCell(9, 10, zoneout=0.5)
     x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9))))
     h_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10))))
     c_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10))))
     lstm.set_init_states(c_tm1, h_tm1)
     y_t = lstm(x_t)
     self.assertEqual((5, 10), y_t.data.numpy().shape)
     self.assertEqual(lstm.training, True)
     lstm.train(mode=False)
     self.assertEqual(lstm.training, False)
     lstm.reset_state()
     pred1 = lstm(x_t)
     lstm.reset_state()
     pred2 = lstm(x_t)
     # must be equal in prediction mode
     self.assertTrue(np.allclose(pred1.data.numpy(), pred2.data.numpy()))
     lstm.train(mode=True)
     self.assertEqual(lstm.training, True)
     lstm.reset_state()
     pred1 = lstm(x_t)
     lstm.reset_state()
     pred2 = lstm(x_t)
     # must not be equal in prediction mode
     self.assertFalse(np.allclose(pred1.data.numpy(), pred2.data.numpy()))
Exemple #4
0
 def test_lstm_layer_shapes(self):
     batsize = 5
     seqlen = 7
     q.LSTMCell.debug = False
     lstm = q.LSTMCell(9, 10)
     lstm = lstm.to_layer().return_all()
     x = Variable(torch.FloatTensor(np.random.random((batsize, seqlen, 9))))
     y = lstm(x)
     self.assertEqual((batsize, seqlen, 10), y.data.numpy().shape)
Exemple #5
0
 def test_lstm_shapes_non_cudnn(self):
     batsize = 5
     lstm = q.LSTMCell(9, 10, use_cudnn_cell=False)
     x_t = Variable(torch.FloatTensor(np.random.random((batsize, 9))))
     h_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10))))
     c_tm1 = Variable(torch.FloatTensor(np.random.random((batsize, 10))))
     lstm.set_init_states(c_tm1, h_tm1)
     y_t = lstm(x_t)
     self.assertEqual((5, 10), y_t.data.numpy().shape)
Exemple #6
0
    def test_lstm_shapes(self):
        batsize = 5
        lstm = q.LSTMCell(9, 10)
        x_t = torch.randn(batsize, 9)
        c_tm1 = torch.randn(1, 10)
        y_tm1 = torch.randn(1, 10)
        lstm.c_0 = torch.tensor(c_tm1)
        lstm.y_0 = torch.tensor(y_tm1)

        y_t = lstm(x_t)
        self.assertEqual((5, 10), y_t.detach().numpy().shape)

        self.assertTrue(
            np.allclose(lstm.y_tm1.detach().numpy(),
                        y_t.detach().numpy()))

        q.batch_reset(lstm)
Exemple #7
0
    def test_it(self):
        x = np.random.randint(0, 100, (1000, 7))
        y_inp = x[:, :-1]
        y_out = x[:, 1:]
        wD = dict((chr(xi), xi) for xi in range(100))

        ctx = torch.randn(1000, 8, 30)

        decoder_emb = q.WordEmb(20, worddic=wD)
        decoder_lstm = q.LSTMCell(20, 30)
        decoder_att = q.DotAttention()
        decoder_out = q.WordLinout(60, worddic=wD)

        decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att,
                                     None, decoder_out)
        decoder_tf = q.TFDecoder(decoder_cell)

        y = decoder_tf(torch.tensor(x), ctx=ctx)

        self.assertTrue(y.size(), (1000, 7, 100))


# endregion
    def test_it(self):
        D = "<MASK> [RED] NT(START) NT(a) T(b) NT(c) T(d) T(e) NT(f) T(g) T(h) T(i)"
        D = dict(zip(D.split(), range(len(D.split()))))
        tok2act = {
            k: (2 if k == "[RED]" else 1 if k[:2] == "NT" else 0)
            for k in D
        }

        class CustomCombiner(StackCellCombiner):
            def forward(self, _x, mask):
                ret = (_x * mask.unsqueeze(-1).float()).sum(
                    1) / mask.float().sum(1).unsqueeze(-1).clamp_min(1e-6)
                ret = ret.detach()  # TODO: for grad debugging
                return ret

        class CustomWordLinout(q.WordLinout):
            def update(self, _):
                pass

        class Tok2Act(torch.nn.Module):
            def __init__(self, t2a, D):
                super(Tok2Act, self).__init__()
                self.D = D
                t2a_ = torch.zeros(max(D.values()) + 1).long()
                for k, v in t2a.items():
                    t2a_[D[k]] = v
                self.register_buffer("t2a", t2a_)

            def forward(self, _x):
                return self.t2a[_x]

        embdim = 4
        coredim = 5
        emb = q.WordEmb(embdim, worddic=D)
        core = q.LSTMCell(embdim, coredim, dropout_rec=.1)
        # combiner = BasicCombiner(embdim)
        combiner = CustomCombiner()
        att = BasicAttention()
        out = CustomWordLinout(coredim * 2, worddic=D)
        tok2act = Tok2Act(tok2act, D)

        cell = StackCell(emb=emb,
                         tok2act=tok2act,
                         core=core,
                         combiner=combiner,
                         att=att,
                         out=out)
        ctx = torch.randn(2, 6, coredim)
        cell.save_ctx(ctx)

        ex1 = "NT(START) NT(a) T(b) NT(c) T(d) T(e) [RED] NT(f) T(g) T(h) [RED] T(i) [RED]"
        ex2 = "NT(START) NT(a) NT(c) T(d) T(e) [RED] [RED]"
        x1 = [D[exi] for exi in ex1.split()] + [0]
        x2 = [D[exi] for exi in ex2.split()]
        x2 = x2 + [0] * (len(x1) - len(x2))
        x = torch.tensor([x1, x2])

        cell._debug_embs = torch.nn.Parameter(torch.zeros(2, len(x1), embdim))

        ys = []
        for i in range(len(x[0])):
            y = cell(x[:, i])
            ys.append(y)

        # print(cell._debug_embs)
        print(cell._debug_embs.size())
        l = ys[11][0].sum()
        l.backward()

        print(cell._debug_embs.grad)
        print(cell._stack)
Exemple #9
0
def run_seq2seq_(
    lr=0.001,
    batsize=32,
    evalbatsize=256,
    epochs=100,
    warmup=5,
    embdim=50,
    encdim=100,
    numlayers=2,
    dropout=.0,
    wreg=1e-6,
    cuda=False,
    gpu=0,
):
    settings = locals().copy()
    device = torch.device("cpu") if not cuda else torch.device("cuda", gpu)
    tt = q.ticktock("script")
    tt.msg("running seq2seq on LC-QuAD")

    tt.tick("loading data")
    xsm, ysm, teststart, tok2act = load_data()
    _tok2act = {ysm.RD[k]: v for k, v in tok2act.items()}

    print("Some examples:")
    for i in range(5):
        print(
            f"{xsm[i]}\n ->{ysm[i]}\n -> {Node.from_transitions(' '.join(ysm[i].split()[1:]), _tok2act)}"
        )

    print("Non-leaf tokens:")
    print({ysm.RD[k]: v for k, v in tok2act.items() if v > 0})

    devstart = teststart - 500
    trainds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[:devstart]).long(),
        torch.tensor(ysm.matrix[:devstart, :-1]).long(),
        torch.tensor(ysm.matrix[:devstart, 1:]).long())
    valds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[devstart:teststart]).long(),
        torch.tensor(ysm.matrix[devstart:teststart, :-1]).long(),
        torch.tensor(ysm.matrix[devstart:teststart, 1:]).long())
    testds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[teststart:]).long(),
        torch.tensor(ysm.matrix[teststart:, :-1]).long(),
        torch.tensor(ysm.matrix[teststart:, 1:]).long())
    tt.msg(
        f"Data splits: train: {len(trainds)}, valid: {len(valds)}, test: {len(testds)}"
    )

    tloader = torch.utils.data.DataLoader(trainds,
                                          batch_size=batsize,
                                          shuffle=True)
    vloader = torch.utils.data.DataLoader(valds,
                                          batch_size=evalbatsize,
                                          shuffle=False)
    xloader = torch.utils.data.DataLoader(testds,
                                          batch_size=evalbatsize,
                                          shuffle=False)
    tt.tock("data loaded")

    # model
    enclayers, declayers = numlayers, numlayers
    decdim = encdim
    xemb = q.WordEmb(embdim, worddic=xsm.D)
    yemb = q.WordEmb(embdim, worddic=ysm.D)
    encdims = [embdim] + [encdim // 2] * enclayers
    xenc = q.LSTMEncoder(embdim,
                         *encdims[1:],
                         bidir=True,
                         dropout_in_shared=dropout)
    decdims = [embdim] + [decdim] * declayers
    dec_core = torch.nn.Sequential(*[
        q.LSTMCell(decdims[i - 1],
                   decdims[i],
                   dropout_in=dropout,
                   dropout_rec=dropout) for i in range(1, len(decdims))
    ])
    yout = q.WordLinout(encdim + decdim, worddic=ysm.D)
    dec_cell = semparse.rnn.LuongCell(emb=yemb,
                                      core=dec_core,
                                      out=yout,
                                      dropout=dropout)
    decoder = q.TFDecoder(dec_cell)
    testdecoder = q.FreeDecoder(dec_cell, maxtime=100)

    m = Seq2Seq(xemb, xenc, decoder)
    testm = Seq2Seq(xemb, xenc, testdecoder, test=True)

    # test model
    tt.tick("running a batch")
    test_y = m(*iter(tloader).next()[:-1])
    q.batch_reset(m)
    test_y = testm(*iter(vloader).next()[:-1])
    q.batch_reset(m)
    tt.tock(f"ran a batch: {test_y.size()}")

    optim = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=wreg)
    tlosses = [
        q.CELoss(mode="logits", ignore_index=0),
        q.Accuracy(ignore_index=0),
        q.SeqAccuracy(ignore_index=0)
    ]
    xlosses = [
        q.CELoss(mode="logits", ignore_index=0),
        q.Accuracy(ignore_index=0),
        q.SeqAccuracy(ignore_index=0)
    ]
    tlosses = [q.LossWrapper(l) for l in tlosses]
    vlosses = [q.LossWrapper(l) for l in xlosses]
    xlosses = [q.LossWrapper(l) for l in xlosses]
    trainloop = partial(q.train_epoch,
                        model=m,
                        dataloader=tloader,
                        optim=optim,
                        losses=tlosses,
                        device=device)
    devloop = partial(q.test_epoch,
                      model=testm,
                      dataloader=vloader,
                      losses=vlosses,
                      device=device)
    testloop = partial(q.test_epoch,
                       model=testm,
                       dataloader=xloader,
                       losses=xlosses,
                       device=device)

    lrplateau = q.util.ReduceLROnPlateau(optim,
                                         mode="max",
                                         factor=.1,
                                         patience=3,
                                         cooldown=1,
                                         warmup=warmup,
                                         threshold=0.,
                                         verbose=True,
                                         eps=1e-9)
    on_after_valid = [lambda: lrplateau.step(vlosses[1].get_epoch_error())]
    _devloop = partial(devloop, on_end=on_after_valid)
    stoptrain = [lambda: all([pg["lr"] <= 1e-7 for pg in optim.param_groups])]

    tt.tick("training")
    q.run_training(trainloop,
                   _devloop,
                   max_epochs=epochs,
                   check_stop=stoptrain)
    tt.tock("done training")

    tt.tick("testing")
    testres = testloop()
    print(testres)
    settings["testres"] = testres
    tt.tock("tested")

    devres = devloop()
    print(devres, vlosses[0].get_epoch_error())

    return vlosses[1].get_epoch_error()