예제 #1
0
파일: test_rnn.py 프로젝트: nilesh-c/qelos
    def test_mask(self):
        batsize, seqlen, indim = 5, 3, 4
        m = q.GRULayer(indim, 6).return_final()
        data = Variable(
            torch.FloatTensor(np.random.random((batsize, seqlen, indim))))
        mask = Variable(
            torch.LongTensor([
                [1, 1, 0],
                [1, 1, 0],
                [1, 1, 0],
                [1, 1, 1],
                [1, 0, 0],
            ]))
        final, pred = m(data, mask=mask)
        print(pred)
        # self.assertFalse(True)
        self.assertEqual((batsize, seqlen, 6), pred.size())
        self.assertEqual((batsize, 6), m.get_states(0)[0].size())
        pred = pred.data.numpy()
        final = final.data.numpy()

        self.assertTrue(
            np.allclose(pred[0, 2, :], np.zeros_like(pred[0, 2, :])))
        self.assertTrue(
            np.allclose(pred[4, 1:, :], np.zeros_like(pred[4, 1:, :])))

        self.assertTrue(np.allclose(final[0, :], pred[0, 1, :]))
        self.assertTrue(np.allclose(final[4, :], pred[4, 0, :]))
        self.assertTrue(np.allclose(final[3, :], pred[3, 2, :]))
예제 #2
0
파일: test_rnn.py 프로젝트: nilesh-c/qelos
 def test_shapes(self):
     batsize, seqlen, indim = 5, 3, 4
     m = q.GRULayer(indim, 6)
     data = Variable(
         torch.FloatTensor(np.random.random((batsize, seqlen, indim))))
     pred = m(data)
     print(pred)
     self.assertEqual((batsize, seqlen, 6), pred.size())
     self.assertEqual((batsize, 6), m.get_states(0)[0].size())
예제 #3
0
    def test_shapes(self):
        batsize, seqlen, inpdim = 5, 7, 8
        vocsize, embdim, encdim = 20, 9, 10
        ctxtoinitff = q.Forward(inpdim, encdim)
        coreff = q.Forward(encdim, encdim)
        initstategen = q.Lambda(lambda *x, **kw: coreff(ctxtoinitff(x[1][:, -1, :])), register_modules=coreff)

        decoder = q.AttentionDecoder(
            attention=q.Attention().forward_gen(inpdim, encdim+embdim, encdim),
            embedder=nn.Embedding(vocsize, embdim),
            core=q.RecurrentStack(
                q.GRULayer(embdim, encdim),
                q.GRULayer(encdim, encdim),
                coreff
            ),
            smo=q.Stack(
                q.Forward(encdim+inpdim, encdim),
                q.Forward(encdim, vocsize),
                q.Softmax(),
                q.argmap.spec(0),
            ),
            ctx_to_smo=True,
            state_to_smo=True,
            decinp_to_att=True
        )

        ctx = np.random.random((batsize, seqlen, inpdim))
        ctx = Variable(torch.FloatTensor(ctx))
        ctxmask = np.ones((batsize, seqlen))
        ctxmask[:, -2:] = 0
        ctxmask[[0, 1], -3:] = 0
        ctxmask = Variable(torch.FloatTensor(ctxmask))
        inp = np.random.randint(0, vocsize, (batsize, seqlen))
        inp = Variable(torch.LongTensor(inp))

        decoded = decoder(inp, ctx, ctxmask)

        self.assertEqual((batsize, seqlen, vocsize), decoded.size())
        self.assertTrue(np.allclose(
            np.sum(decoded.data.numpy(), axis=-1),
            np.ones_like(np.sum(decoded.data.numpy(), axis=-1))))
        print(decoded.size())
예제 #4
0
파일: test_rnn.py 프로젝트: nilesh-c/qelos
 def test_shapes(self):
     batsize, seqlen, vocsize, embdim, encdim = 5, 3, 20, 4, 6
     m = q.RecurrentStack(nn.Embedding(vocsize, embdim),
                          q.GRULayer(embdim, encdim),
                          q.Forward(encdim, vocsize), q.LogSoftmax())
     data = Variable(
         torch.LongTensor(np.random.randint(0, vocsize, (batsize, seqlen))))
     pred = m(data)
     print(pred)
     self.assertEqual((batsize, seqlen, vocsize), pred.size())
     self.assertEqual((batsize, encdim), m.get_states(0)[0].size())
예제 #5
0
 def __init__(self, input_size, hidden_size, num_layers, num_classes):
     super(RNN, self).__init__()
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     if mode == "qrnn":
         tt.msg("using q.RNN")
         self.rnn = RecStack(*[GRUCell(input_size, hidden_size, use_cudnn_cell=False, rec_batch_norm="main")] +
                              [GRUCell(hidden_size, hidden_size) for i in range(num_layers - 1)])\
                     .to_layer().return_all()
     elif mode == "nn":
         tt.msg("using nn.RNN")
         self.rnn = nn.GRU(input_size,
                           hidden_size,
                           num_layers,
                           batch_first=True)
     elif mode == "stack":
         self.rnn = q.RecurrentStack(
             *([q.GRULayer(input_size, hidden_size)] + [
                 q.GRULayer(hidden_size, hidden_size)
                 for i in range(num_layers - 1)
             ]))
     self.fc = nn.Linear(hidden_size, num_classes)
예제 #6
0
파일: test_word.py 프로젝트: nilesh-c/qelos
    def test_masked_3D_data(self):
        self.linout.data = q.val(
            np.random.random((7, 10, 3)).astype(dtype="float32")).v
        self.linout.computer = q.GRULayer(3, 15).return_final("only")

        x = Variable(torch.randn(3, 15)).float()
        msk_nonzero_batches = [0, 0, 0, 1, 1, 2]
        msk_nonzero_values = [0, 2, 3, 2, 6, 5]
        msk = np.zeros((3, 7)).astype("int32")
        msk[msk_nonzero_batches, msk_nonzero_values] = 1
        print(msk)
        msk = Variable(torch.from_numpy(msk))
        out = self.linout(x, mask=msk)
        self.assertEqual(out.size(), (3, 7))
        data = self.linout.data
        computer = self.linout.computer
        cout = torch.matmul(x, computer(data).t())
        cout = cout * msk.float()
        self.assertTrue(np.allclose(cout.data.numpy(), out.data.numpy()))
예제 #7
0
    def test_fast_context_decoder_shape(self):
        batsize, seqlen, vocsize = 5, 4, 7
        embdim, encdim, outdim, ctxdim = 10, 16, 10, 8
        # model def
        decoder = q.ContextDecoder(
            nn.Embedding(vocsize, embdim, padding_idx=0),
            q.RecurrentStack(
                q.GRULayer(embdim + ctxdim, encdim),
                q.Forward(encdim, vocsize),
                q.Softmax()
            )
        )
        # end model def
        data = np.random.randint(0, vocsize, (batsize, seqlen))
        data = Variable(torch.LongTensor(data))
        ctx = Variable(torch.FloatTensor(np.random.random((batsize, ctxdim))))

        decoded = decoder(data, ctx).data.numpy()
        self.assertEqual(decoded.shape, (batsize, seqlen, vocsize))  # shape check
        self.assertTrue(np.allclose(np.sum(decoded, axis=-1), np.ones_like(np.sum(decoded, axis=-1))))  # prob check
예제 #8
0
파일: seq2seq.py 프로젝트: nilesh-c/qelos
def make_encoder(src_emb, embdim=100, dim=100, **kw):
    """ make encoder
    # concatenating bypass encoder:
    #       embedding  --> top GRU
    #                  --> 1st BiGRU
    #       1st BiGRU  --> top GRU
    #                  --> 2nd BiGRU
    #       2nd BiGRU  --> top GRU
    """
    encoder = q.RecurrentStack(
        src_emb,        # embs, masks
        q.argsave.spec(emb=0, mask=1),
        q.argmap.spec(0, mask=["mask"]),
        q.BidirGRULayer(embdim, dim),
        q.argsave.spec(bypass=0),
        q.argmap.spec(0, mask=["mask"]),
        q.BidirGRULayer(dim * 2, dim),
        q.argmap.spec(0, ["bypass"], ["emb"]),
        q.Lambda(lambda x, y, z: torch.cat([x, y, z], 1)),
        q.argmap.spec(0, mask=["mask"]),
        q.GRULayer(dim * 4 + embdim, dim).return_final(True),
        q.argmap.spec(1, ["mask"], 0),
    )   # returns (all_states, mask, final_state)
    return encoder
예제 #9
0
def main(
    lr=0.5,
    epochs=30,
    batsize=32,
    embdim=90,
    encdim=90,
    mode="cell",  # "fast" or "cell"
    wreg=0.0001,
    cuda=False,
    gpu=1,
):
    if cuda:
        torch.cuda.set_device(gpu)
    usecuda = cuda
    vocsize = 50
    # create datasets tensor
    tt.tick("loading data")
    sequences = np.random.randint(0, vocsize, (batsize * 100, 16))
    # wrap in dataset
    dataset = q.TensorDataset(sequences[:batsize * 80],
                              sequences[:batsize * 80])
    validdataset = q.TensorDataset(sequences[batsize * 80:],
                                   sequences[batsize * 80:])
    dataloader = DataLoader(dataset=dataset, batch_size=batsize, shuffle=True)
    validdataloader = DataLoader(dataset=validdataset,
                                 batch_size=batsize,
                                 shuffle=False)
    tt.tock("data loaded")
    # model
    tt.tick("building model")
    embedder = nn.Embedding(vocsize, embdim)

    encoder = q.RecurrentStack(
        embedder,
        q.SRUCell(encdim).to_layer(),
        q.SRUCell(encdim).to_layer(),
        q.SRUCell(encdim).to_layer(),
        q.SRUCell(encdim).to_layer().return_final(),
    )
    if mode == "fast":
        decoder = q.AttentionDecoder(
            attention=q.Attention().forward_gen(encdim, encdim, encdim),
            embedder=embedder,
            core=q.RecurrentStack(q.GRULayer(embdim, encdim)),
            smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()),
            return_att=True)
    else:
        decoder = q.AttentionDecoderCell(
            attention=q.Attention().forward_gen(encdim, encdim + embdim,
                                                encdim),
            embedder=embedder,
            core=q.RecStack(
                q.GRUCell(embdim + encdim,
                          encdim,
                          use_cudnn_cell=False,
                          rec_batch_norm=None,
                          activation="crelu")),
            smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()),
            att_after_update=False,
            ctx_to_decinp=True,
            decinp_to_att=True,
            return_att=True,
        ).to_decoder()

    m = EncDec(encoder, decoder, mode=mode)

    losses = q.lossarray(q.SeqNLLLoss(ignore_index=None),
                         q.SeqAccuracy(ignore_index=None),
                         q.SeqElemAccuracy(ignore_index=None))
    validlosses = q.lossarray(q.SeqNLLLoss(ignore_index=None),
                              q.SeqAccuracy(ignore_index=None),
                              q.SeqElemAccuracy(ignore_index=None))

    optimizer = torch.optim.Adadelta(m.parameters(), lr=lr, weight_decay=wreg)
    tt.tock("model built")

    q.train(m).cuda(usecuda).train_on(dataloader, losses)\
        .set_batch_transformer(lambda x, y: (x, y[:, :-1], y[:, 1:]))\
        .valid_on(validdataloader, validlosses)\
        .optimizer(optimizer).clip_grad_norm(2.)\
        .train(epochs)

    testdat = np.random.randint(0, vocsize, (batsize, 20))
    testdata = q.var(torch.from_numpy(testdat)).cuda(usecuda).v
    testdata_out = q.var(torch.from_numpy(testdat)).cuda(usecuda).v
    if mode == "cell" and False:
        inv_idx = torch.arange(testdata.size(1) - 1, -1, -1).long()
        testdata = testdata.index_select(1, inv_idx)
    probs, attw = m(testdata, testdata_out[:, :-1])

    def plot(x):
        sns.heatmap(x)
        plt.show()

    embed()
예제 #10
0
def main(
    # Hyper Parameters
    sequence_length=28,
    input_size=28,
    hidden_size=128,
    num_layers=2,
    batch_size=5,
    num_epochs=2,
    learning_rate=0.01,
    ctx_to_decinp=False,
    gpu=False,
    mode="stack",  # "nn" or "qrnn" or "stack"
    trivial=False,
):
    tt.msg("using q: {}".format(mode))
    # MNIST Dataset
    train_dataset = dsets.MNIST(root='../../../datasets/mnist/',
                                train=True,
                                transform=transforms.ToTensor(),
                                download=True)

    test_dataset = dsets.MNIST(root='../../../datasets/mnist/',
                               train=False,
                               transform=transforms.ToTensor())

    # Data Loader (Input Pipeline)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

    if gpu:
        q.var.all_cuda = True
    encoder = Encoder(input_size, hidden_size, num_layers, mode=mode)
    embdim = hidden_size
    decdim = 100
    initstate = nn.Linear(hidden_size, decdim)
    decoder = q.ContextDecoder(*[
        nn.Embedding(256, embdim),
        q.RecurrentStack(
            q.GRULayer((embdim + hidden_size if ctx_to_decinp else embdim),
                       decdim), nn.Linear(decdim, 256), nn.LogSoftmax())
    ],
                               ctx_to_h0=initstate,
                               ctx_to_decinp=ctx_to_decinp)

    if trivial:
        encdec = IdxToSeq(decoder, embdim=hidden_size)
    else:
        encdec = ImgToSeq(encoder, decoder)
    if gpu:
        encdec.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = q.SeqNLLLoss(ignore_index=0)
    if gpu:
        criterion.cuda()
    optimizer = torch.optim.Adadelta(encdec.parameters(), lr=learning_rate)

    tt.msg("training")
    # Train the Model
    for epoch in range(num_epochs):
        tt.tick()
        btt = ticktock("batch")
        btt.tick()
        for i, (images, labels) in enumerate(train_loader):
            #btt.tick("doing batch")
            images = q.var(images.view(-1, sequence_length,
                                       input_size)).cuda(crit=gpu).v
            labels = q.var(labels).cuda(crit=gpu).v
            tgt = number2charseq(labels)
            if trivial:
                images = labels

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = encdec(images, tgt[:, :-1])
            loss = criterion(outputs, tgt[:, 1:])
            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                btt.tock("100 batches done")
                tgn = 0
                for param in encdec.parameters():
                    tgn = tgn + torch.norm(param.grad, 2)
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, TGN: %.8f' %
                      (epoch + 1, num_epochs, i + 1, len(train_dataset) //
                       batch_size, loss.data[0], tgn.cpu().data.numpy()[0]))
                btt.tick()
            #tt.tock("batch done")
        tt.tock("epoch {} done {}".format(epoch, loss.data[0]))
    # Test the Model
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = q.var(images.view(-1, sequence_length,
                                   input_size)).cuda(crit=gpu).v
        labels = q.var(labels).cuda(crit=gpu).v
        if trivial:
            images = labels
        tgt = number2charseq(labels)
        outputs = encdec(images, tgt[:, :-1])
        _, predicted = torch.max(outputs.data, 2)
        if tgt.is_cuda:
            tgt = tgt.cpu()
        if predicted.is_cuda:
            predicted = predicted.cpu()
        tgt = tgt[:, 1:].data.numpy()
        predicted = predicted.numpy()
        # print(predicted[:10])
        # print(tgt[:10])
        # print(labels[:10])

        tgtmask = tgt == 0
        eq = predicted == tgt
        eq = eq | tgtmask
        eq = np.all(eq, axis=1)
        correct += eq.sum()
        total += labels.size(0)

    print('Test Accuracy of the model on the 10000 test images: %d %%' %
          (100. * correct / total))

    # Save the Model
    torch.save(encdec.state_dict(), 'rnn.pkl')