Ejemplo n.º 1
0
    def loadvalue(self, path, dim, indim=None, maskid=True, rareid=True):
        # TODO: nonstandard mask and rareid?
        tt = ticktock(self.__class__.__name__)
        tt.tick()
        # load weights
        if path not in self.loadcache:
            W = np.load(open(path + ".npy"))
        else:
            W = self.loadcache[path][0]

        # load words
        if path not in self.loadcache:
            words = pkl.load(open(path + ".words"))
        else:
            words = self.loadcache[path][1]

        # cache
        if self.useloadcache:
            self.loadcache[path] = (W, words)

        # adapt
        if indim is not None:
            W = W[:indim, :]
        if rareid:
            W = np.concatenate([np.zeros_like(W[0, :])[np.newaxis, :], W],
                               axis=0)
        if maskid:
            W = np.concatenate([np.zeros_like(W[0, :])[np.newaxis, :], W],
                               axis=0)
        tt.tock("vectors loaded")
        tt.tick()

        # dictionary
        D = OrderedDict()
        i = 0
        if maskid is not None:
            D[self.masktoken] = i
            i += 1
        if rareid is not None:
            D[self.raretoken] = i
            i += 1
        wordset = set(words)
        for j, word in enumerate(words):
            if indim is not None and j >= indim:
                break
            if word.lower() not in wordset and self.trylowercase:
                word = word.lower()
            D[word] = i
            i += 1
        tt.tock("dictionary created")
        return W, D
Ejemplo n.º 2
0
 def __init__(self, model):
     super(train, self).__init__()
     self.model = model
     self.epochs = None
     self.current_epoch = 0
     self.trainlosses = None
     self.validlosses = None
     self.usecuda = False
     self.cudaargs = ([], {})
     self.optim = None
     self.transform_batch = None
     self.traindataloader = None
     self.validdataloader = None
     self.tt = ticktock("trainer")
     # long API
     self._clip_grad_norm = None
     # early stopping
     self._earlystop = False
     self._earlystop_criterium = None
     self._earlystop_selector = None
     self._earlystop_select_history = None
Ejemplo n.º 3
0
from __future__ import print_function
import torch
from torch import nn
import qelos as q
import numpy as np
from qelos.util import argprun, ticktock
from torch.utils.data.dataset import TensorDataset
from torch.utils.data import DataLoader
from IPython import embed
from matplotlib import pyplot as plt
import seaborn as sns

tt = ticktock("script")


class EncDec(nn.Module):
    def __init__(self, encoder, decoder, mode="fast"):
        super(EncDec, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.mode = mode

    def forward(self, srcseq, tgtseq):
        final, enc = self.encoder(srcseq)
        if self.mode == "fast":
            self.decoder.set_init_states(final)
        else:
            self.decoder.set_init_states(final, final)
        dec = self.decoder(tgtseq, enc)
        return dec
Ejemplo n.º 4
0
def main(
        # Hyper Parameters
        sequence_length=28,
        input_size=28,
        hidden_size=128,
        num_layers=2,
        num_classes=10,
        batch_size=100,
        num_epochs=2,
        learning_rate=0.01,
        gpu=False,
        mode="qrnn"  # "nn" or "qrnn" or "stack"
):

    tt = ticktock("script")
    tt.msg("using q: {}".format(mode))
    # MNIST Dataset
    train_dataset = dsets.MNIST(root='../../../datasets/mnist/',
                                train=True,
                                transform=transforms.ToTensor(),
                                download=True)

    test_dataset = dsets.MNIST(root='../../../datasets/mnist/',
                               train=False,
                               transform=transforms.ToTensor())

    # Data Loader (Input Pipeline)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

    # RNN Model (Many-to-One)
    class RNN(nn.Module):
        def __init__(self, input_size, hidden_size, num_layers, num_classes):
            super(RNN, self).__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            if mode == "qrnn":
                tt.msg("using q.RNN")
                self.rnn = RecStack(*[GRUCell(input_size, hidden_size, use_cudnn_cell=False, rec_batch_norm="main")] +
                                     [GRUCell(hidden_size, hidden_size) for i in range(num_layers - 1)])\
                            .to_layer().return_all()
            elif mode == "nn":
                tt.msg("using nn.RNN")
                self.rnn = nn.GRU(input_size,
                                  hidden_size,
                                  num_layers,
                                  batch_first=True)
            elif mode == "stack":
                self.rnn = q.RecurrentStack(
                    *([q.GRULayer(input_size, hidden_size)] + [
                        q.GRULayer(hidden_size, hidden_size)
                        for i in range(num_layers - 1)
                    ]))
            self.fc = nn.Linear(hidden_size, num_classes)

        def forward(self, x):
            # Set initial states
            h0 = q.var(
                torch.zeros(self.num_layers, x.size(0),
                            self.hidden_size)).cuda(crit=x).v
            #c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))

            # Forward propagate RNN
            if mode == "qrnn" or mode == "stack":
                out = self.rnn(x)
            else:
                out, _ = self.rnn(x, h0)

            # Decode hidden state of last time step
            out = nn.LogSoftmax()(self.fc(out[:, -1, :]))
            return out

    if gpu:
        q.var.all_cuda = True
    rnn = RNN(input_size, hidden_size, num_layers, num_classes)
    if gpu:
        rnn.cuda()

    # Loss and Optimizer
    criterion = q.lossarray(nn.NLLLoss())
    if gpu:
        criterion.cuda()
    optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
    q.train(rnn).train_on(train_loader, criterion).cuda(gpu)\
        .optimizer(optimizer).set_batch_transformer(lambda x, y: (x.view(-1, sequence_length, input_size), y))\
        .train(num_epochs)
    # tt.msg("training")
    # # Train the Model
    # for epoch in range(num_epochs):
    #     tt.tick()
    #     btt = ticktock("batch")
    #     btt.tick()
    #     for i, (images, labels) in enumerate(train_loader):
    #         #btt.tick("doing batch")
    #         images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v
    #         labels = q.var(labels).cuda(crit=gpu).v
    #
    #         # Forward + Backward + Optimize
    #         optimizer.zero_grad()
    #         outputs = rnn(images)
    #         loss = criterion(outputs, labels)
    #         loss.backward()
    #         optimizer.step()
    #
    #         if (i+1) % 100 == 0:
    #             btt.tock("100 batches done")
    #             print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
    #                    %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.datasets[0]))
    #             btt.tick()
    #         #tt.tock("batch done")
    #     tt.tock("epoch {} done".format(epoch))
    # Test the Model
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = q.var(images.view(-1, sequence_length,
                                   input_size)).cuda(crit=gpu).v
        labels = q.var(labels).cuda(crit=gpu).v
        outputs = rnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Test Accuracy of the model on the 10000 test images: %d %%' %
          (100 * correct / total))

    # Save the Model
    torch.save(rnn.state_dict(), 'rnn.pkl')
Ejemplo n.º 5
0
    def trainloop(self):
        stop = False
        self.tt.tick("training")
        tt = ticktock("-")
        current_epoch = 0
        totaltrainbats = len(self.traindataloader)
        while not stop:
            self.current_epoch = current_epoch
            stop = self.current_epoch + 1 == self.epochs
            self.trainlosses.push_and_reset()
            tt.tick()
            self.model.train()
            for i, batch in enumerate(self.traindataloader):
                self.optim.zero_grad()
                params = self.model.parameters()
                batch = [
                    q.var(batch_e).cuda(self.usecuda).v for batch_e in batch
                ]
                if self.transform_batch is not None:
                    batch = self.transform_batch(*batch)
                modelouts = self.model(*batch[:-1])
                if not issequence(modelouts):
                    modelouts = [modelouts]
                trainlosses = self.trainlosses(modelouts[0], batch[-1])
                trainlosses[0].backward()
                # grad total norm
                tgn0 = None
                if self._clip_grad_norm is not None:
                    tgn0 = nn.utils.clip_grad_norm(self.model.parameters(),
                                                   self._clip_grad_norm)
                if tgn0 is not None:
                    tgn = tgn0
                else:
                    tgn = 0
                    for param in self.model.parameters():
                        tgn += param.grad.pow(
                            2).sum() if param.grad is not None else 0
                    tgn = tgn.pow(1. / 2)
                    tgn = tgn.data[0]

                self.optim.step()

                tt.live(
                    "train - Epoch {}/{} - [{}/{}]: {} - TGN: {:.4f}".format(
                        self.current_epoch + 1, self.epochs, i + 1,
                        totaltrainbats, self.trainlosses.pp(), tgn))
            ttmsg = "Epoch {}/{} -- train: {}"\
                .format(
                    self.current_epoch+1,
                    self.epochs,
                    self.trainlosses.pp()
                )
            train_epoch_losses = self.trainlosses.get_agg_errors()
            valid_epoch_losses = []
            if self.validlosses is not None:
                self.model.eval()
                self.validlosses.push_and_reset()
                totalvalidbats = len(self.validdataloader)
                for i, batch in enumerate(self.validdataloader):
                    batch = [
                        q.var(batch_e).cuda(self.usecuda).v
                        for batch_e in batch
                    ]
                    if self.transform_batch is not None:
                        batch = self.transform_batch(*batch)
                    modelouts = self.model(*batch[:-1])
                    if not issequence(modelouts):
                        modelouts = [modelouts]
                    validlosses = self.validlosses(modelouts[0], batch[-1])
                    tt.live("valid - Epoch {}/{} - [{}/{}]: {}".format(
                        self.current_epoch + 1, self.epochs, i + 1,
                        totalvalidbats, self.validlosses.pp()))
                ttmsg += " -- valid: {}".format(self.validlosses.pp())
                valid_epoch_losses = self.validlosses.get_agg_errors()
            tt.stoplive()
            tt.tock(ttmsg)
            if self._earlystop:
                doearlystop = self.earlystop_eval(train_epoch_losses,
                                                  valid_epoch_losses)
                if doearlystop:
                    tt.msg("stopping early")
                stop = stop or doearlystop
            current_epoch += 1
        self.tt.tock("trained")
Ejemplo n.º 6
0
def main(
    # Hyper Parameters
    sequence_length=28,
    input_size=28,
    hidden_size=128,
    num_layers=2,
    batch_size=5,
    num_epochs=2,
    learning_rate=0.01,
    ctx_to_decinp=False,
    gpu=False,
    mode="stack",  # "nn" or "qrnn" or "stack"
    trivial=False,
):
    tt.msg("using q: {}".format(mode))
    # MNIST Dataset
    train_dataset = dsets.MNIST(root='../../../datasets/mnist/',
                                train=True,
                                transform=transforms.ToTensor(),
                                download=True)

    test_dataset = dsets.MNIST(root='../../../datasets/mnist/',
                               train=False,
                               transform=transforms.ToTensor())

    # Data Loader (Input Pipeline)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

    if gpu:
        q.var.all_cuda = True
    encoder = Encoder(input_size, hidden_size, num_layers, mode=mode)
    embdim = hidden_size
    decdim = 100
    initstate = nn.Linear(hidden_size, decdim)
    decoder = q.ContextDecoder(*[
        nn.Embedding(256, embdim),
        q.RecurrentStack(
            q.GRULayer((embdim + hidden_size if ctx_to_decinp else embdim),
                       decdim), nn.Linear(decdim, 256), nn.LogSoftmax())
    ],
                               ctx_to_h0=initstate,
                               ctx_to_decinp=ctx_to_decinp)

    if trivial:
        encdec = IdxToSeq(decoder, embdim=hidden_size)
    else:
        encdec = ImgToSeq(encoder, decoder)
    if gpu:
        encdec.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = q.SeqNLLLoss(ignore_index=0)
    if gpu:
        criterion.cuda()
    optimizer = torch.optim.Adadelta(encdec.parameters(), lr=learning_rate)

    tt.msg("training")
    # Train the Model
    for epoch in range(num_epochs):
        tt.tick()
        btt = ticktock("batch")
        btt.tick()
        for i, (images, labels) in enumerate(train_loader):
            #btt.tick("doing batch")
            images = q.var(images.view(-1, sequence_length,
                                       input_size)).cuda(crit=gpu).v
            labels = q.var(labels).cuda(crit=gpu).v
            tgt = number2charseq(labels)
            if trivial:
                images = labels

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = encdec(images, tgt[:, :-1])
            loss = criterion(outputs, tgt[:, 1:])
            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                btt.tock("100 batches done")
                tgn = 0
                for param in encdec.parameters():
                    tgn = tgn + torch.norm(param.grad, 2)
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, TGN: %.8f' %
                      (epoch + 1, num_epochs, i + 1, len(train_dataset) //
                       batch_size, loss.data[0], tgn.cpu().data.numpy()[0]))
                btt.tick()
            #tt.tock("batch done")
        tt.tock("epoch {} done {}".format(epoch, loss.data[0]))
    # Test the Model
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = q.var(images.view(-1, sequence_length,
                                   input_size)).cuda(crit=gpu).v
        labels = q.var(labels).cuda(crit=gpu).v
        if trivial:
            images = labels
        tgt = number2charseq(labels)
        outputs = encdec(images, tgt[:, :-1])
        _, predicted = torch.max(outputs.data, 2)
        if tgt.is_cuda:
            tgt = tgt.cpu()
        if predicted.is_cuda:
            predicted = predicted.cpu()
        tgt = tgt[:, 1:].data.numpy()
        predicted = predicted.numpy()
        # print(predicted[:10])
        # print(tgt[:10])
        # print(labels[:10])

        tgtmask = tgt == 0
        eq = predicted == tgt
        eq = eq | tgtmask
        eq = np.all(eq, axis=1)
        correct += eq.sum()
        total += labels.size(0)

    print('Test Accuracy of the model on the 10000 test images: %d %%' %
          (100. * correct / total))

    # Save the Model
    torch.save(encdec.state_dict(), 'rnn.pkl')