def loadvalue(self, path, dim, indim=None, maskid=True, rareid=True): # TODO: nonstandard mask and rareid? tt = ticktock(self.__class__.__name__) tt.tick() # load weights if path not in self.loadcache: W = np.load(open(path + ".npy")) else: W = self.loadcache[path][0] # load words if path not in self.loadcache: words = pkl.load(open(path + ".words")) else: words = self.loadcache[path][1] # cache if self.useloadcache: self.loadcache[path] = (W, words) # adapt if indim is not None: W = W[:indim, :] if rareid: W = np.concatenate([np.zeros_like(W[0, :])[np.newaxis, :], W], axis=0) if maskid: W = np.concatenate([np.zeros_like(W[0, :])[np.newaxis, :], W], axis=0) tt.tock("vectors loaded") tt.tick() # dictionary D = OrderedDict() i = 0 if maskid is not None: D[self.masktoken] = i i += 1 if rareid is not None: D[self.raretoken] = i i += 1 wordset = set(words) for j, word in enumerate(words): if indim is not None and j >= indim: break if word.lower() not in wordset and self.trylowercase: word = word.lower() D[word] = i i += 1 tt.tock("dictionary created") return W, D
def __init__(self, model): super(train, self).__init__() self.model = model self.epochs = None self.current_epoch = 0 self.trainlosses = None self.validlosses = None self.usecuda = False self.cudaargs = ([], {}) self.optim = None self.transform_batch = None self.traindataloader = None self.validdataloader = None self.tt = ticktock("trainer") # long API self._clip_grad_norm = None # early stopping self._earlystop = False self._earlystop_criterium = None self._earlystop_selector = None self._earlystop_select_history = None
from __future__ import print_function import torch from torch import nn import qelos as q import numpy as np from qelos.util import argprun, ticktock from torch.utils.data.dataset import TensorDataset from torch.utils.data import DataLoader from IPython import embed from matplotlib import pyplot as plt import seaborn as sns tt = ticktock("script") class EncDec(nn.Module): def __init__(self, encoder, decoder, mode="fast"): super(EncDec, self).__init__() self.encoder = encoder self.decoder = decoder self.mode = mode def forward(self, srcseq, tgtseq): final, enc = self.encoder(srcseq) if self.mode == "fast": self.decoder.set_init_states(final) else: self.decoder.set_init_states(final, final) dec = self.decoder(tgtseq, enc) return dec
def main( # Hyper Parameters sequence_length=28, input_size=28, hidden_size=128, num_layers=2, num_classes=10, batch_size=100, num_epochs=2, learning_rate=0.01, gpu=False, mode="qrnn" # "nn" or "qrnn" or "stack" ): tt = ticktock("script") tt.msg("using q: {}".format(mode)) # MNIST Dataset train_dataset = dsets.MNIST(root='../../../datasets/mnist/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='../../../datasets/mnist/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # RNN Model (Many-to-One) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers if mode == "qrnn": tt.msg("using q.RNN") self.rnn = RecStack(*[GRUCell(input_size, hidden_size, use_cudnn_cell=False, rec_batch_norm="main")] + [GRUCell(hidden_size, hidden_size) for i in range(num_layers - 1)])\ .to_layer().return_all() elif mode == "nn": tt.msg("using nn.RNN") self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True) elif mode == "stack": self.rnn = q.RecurrentStack( *([q.GRULayer(input_size, hidden_size)] + [ q.GRULayer(hidden_size, hidden_size) for i in range(num_layers - 1) ])) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): # Set initial states h0 = q.var( torch.zeros(self.num_layers, x.size(0), self.hidden_size)).cuda(crit=x).v #c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) # Forward propagate RNN if mode == "qrnn" or mode == "stack": out = self.rnn(x) else: out, _ = self.rnn(x, h0) # Decode hidden state of last time step out = nn.LogSoftmax()(self.fc(out[:, -1, :])) return out if gpu: q.var.all_cuda = True rnn = RNN(input_size, hidden_size, num_layers, num_classes) if gpu: rnn.cuda() # Loss and Optimizer criterion = q.lossarray(nn.NLLLoss()) if gpu: criterion.cuda() optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) q.train(rnn).train_on(train_loader, criterion).cuda(gpu)\ .optimizer(optimizer).set_batch_transformer(lambda x, y: (x.view(-1, sequence_length, input_size), y))\ .train(num_epochs) # tt.msg("training") # # Train the Model # for epoch in range(num_epochs): # tt.tick() # btt = ticktock("batch") # btt.tick() # for i, (images, labels) in enumerate(train_loader): # #btt.tick("doing batch") # images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v # labels = q.var(labels).cuda(crit=gpu).v # # # Forward + Backward + Optimize # optimizer.zero_grad() # outputs = rnn(images) # loss = criterion(outputs, labels) # loss.backward() # optimizer.step() # # if (i+1) % 100 == 0: # btt.tock("100 batches done") # print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' # %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.datasets[0])) # btt.tick() # #tt.tock("batch done") # tt.tock("epoch {} done".format(epoch)) # Test the Model correct = 0 total = 0 for images, labels in test_loader: images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v labels = q.var(labels).cuda(crit=gpu).v outputs = rnn(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) # Save the Model torch.save(rnn.state_dict(), 'rnn.pkl')
def trainloop(self): stop = False self.tt.tick("training") tt = ticktock("-") current_epoch = 0 totaltrainbats = len(self.traindataloader) while not stop: self.current_epoch = current_epoch stop = self.current_epoch + 1 == self.epochs self.trainlosses.push_and_reset() tt.tick() self.model.train() for i, batch in enumerate(self.traindataloader): self.optim.zero_grad() params = self.model.parameters() batch = [ q.var(batch_e).cuda(self.usecuda).v for batch_e in batch ] if self.transform_batch is not None: batch = self.transform_batch(*batch) modelouts = self.model(*batch[:-1]) if not issequence(modelouts): modelouts = [modelouts] trainlosses = self.trainlosses(modelouts[0], batch[-1]) trainlosses[0].backward() # grad total norm tgn0 = None if self._clip_grad_norm is not None: tgn0 = nn.utils.clip_grad_norm(self.model.parameters(), self._clip_grad_norm) if tgn0 is not None: tgn = tgn0 else: tgn = 0 for param in self.model.parameters(): tgn += param.grad.pow( 2).sum() if param.grad is not None else 0 tgn = tgn.pow(1. / 2) tgn = tgn.data[0] self.optim.step() tt.live( "train - Epoch {}/{} - [{}/{}]: {} - TGN: {:.4f}".format( self.current_epoch + 1, self.epochs, i + 1, totaltrainbats, self.trainlosses.pp(), tgn)) ttmsg = "Epoch {}/{} -- train: {}"\ .format( self.current_epoch+1, self.epochs, self.trainlosses.pp() ) train_epoch_losses = self.trainlosses.get_agg_errors() valid_epoch_losses = [] if self.validlosses is not None: self.model.eval() self.validlosses.push_and_reset() totalvalidbats = len(self.validdataloader) for i, batch in enumerate(self.validdataloader): batch = [ q.var(batch_e).cuda(self.usecuda).v for batch_e in batch ] if self.transform_batch is not None: batch = self.transform_batch(*batch) modelouts = self.model(*batch[:-1]) if not issequence(modelouts): modelouts = [modelouts] validlosses = self.validlosses(modelouts[0], batch[-1]) tt.live("valid - Epoch {}/{} - [{}/{}]: {}".format( self.current_epoch + 1, self.epochs, i + 1, totalvalidbats, self.validlosses.pp())) ttmsg += " -- valid: {}".format(self.validlosses.pp()) valid_epoch_losses = self.validlosses.get_agg_errors() tt.stoplive() tt.tock(ttmsg) if self._earlystop: doearlystop = self.earlystop_eval(train_epoch_losses, valid_epoch_losses) if doearlystop: tt.msg("stopping early") stop = stop or doearlystop current_epoch += 1 self.tt.tock("trained")
def main( # Hyper Parameters sequence_length=28, input_size=28, hidden_size=128, num_layers=2, batch_size=5, num_epochs=2, learning_rate=0.01, ctx_to_decinp=False, gpu=False, mode="stack", # "nn" or "qrnn" or "stack" trivial=False, ): tt.msg("using q: {}".format(mode)) # MNIST Dataset train_dataset = dsets.MNIST(root='../../../datasets/mnist/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='../../../datasets/mnist/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) if gpu: q.var.all_cuda = True encoder = Encoder(input_size, hidden_size, num_layers, mode=mode) embdim = hidden_size decdim = 100 initstate = nn.Linear(hidden_size, decdim) decoder = q.ContextDecoder(*[ nn.Embedding(256, embdim), q.RecurrentStack( q.GRULayer((embdim + hidden_size if ctx_to_decinp else embdim), decdim), nn.Linear(decdim, 256), nn.LogSoftmax()) ], ctx_to_h0=initstate, ctx_to_decinp=ctx_to_decinp) if trivial: encdec = IdxToSeq(decoder, embdim=hidden_size) else: encdec = ImgToSeq(encoder, decoder) if gpu: encdec.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() criterion = q.SeqNLLLoss(ignore_index=0) if gpu: criterion.cuda() optimizer = torch.optim.Adadelta(encdec.parameters(), lr=learning_rate) tt.msg("training") # Train the Model for epoch in range(num_epochs): tt.tick() btt = ticktock("batch") btt.tick() for i, (images, labels) in enumerate(train_loader): #btt.tick("doing batch") images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v labels = q.var(labels).cuda(crit=gpu).v tgt = number2charseq(labels) if trivial: images = labels # Forward + Backward + Optimize optimizer.zero_grad() outputs = encdec(images, tgt[:, :-1]) loss = criterion(outputs, tgt[:, 1:]) loss.backward() optimizer.step() if (i + 1) % 100 == 0: btt.tock("100 batches done") tgn = 0 for param in encdec.parameters(): tgn = tgn + torch.norm(param.grad, 2) print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, TGN: %.8f' % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, loss.data[0], tgn.cpu().data.numpy()[0])) btt.tick() #tt.tock("batch done") tt.tock("epoch {} done {}".format(epoch, loss.data[0])) # Test the Model correct = 0 total = 0 for images, labels in test_loader: images = q.var(images.view(-1, sequence_length, input_size)).cuda(crit=gpu).v labels = q.var(labels).cuda(crit=gpu).v if trivial: images = labels tgt = number2charseq(labels) outputs = encdec(images, tgt[:, :-1]) _, predicted = torch.max(outputs.data, 2) if tgt.is_cuda: tgt = tgt.cpu() if predicted.is_cuda: predicted = predicted.cpu() tgt = tgt[:, 1:].data.numpy() predicted = predicted.numpy() # print(predicted[:10]) # print(tgt[:10]) # print(labels[:10]) tgtmask = tgt == 0 eq = predicted == tgt eq = eq | tgtmask eq = np.all(eq, axis=1) correct += eq.sum() total += labels.size(0) print('Test Accuracy of the model on the 10000 test images: %d %%' % (100. * correct / total)) # Save the Model torch.save(encdec.state_dict(), 'rnn.pkl')