def create_theano_loss(d): X, t = T.dmatrix('X'), T.dvector('t') log_sigma2 = theano.shared(np.ones((num_classes, d))) theta = theano.shared(np.random.randn(num_classes, d)) # Change parametrization log_alpha = log_sigma2 - T.log(theta**2) la, alpha = log_alpha, T.exp(log_alpha) # -KL(q || prior) mD_KL = -(0.5 * T.log1p(T.exp(-la)) - (0.03 + 1.0 / (1.0 + T.exp(-(1.5 * (la + 1.3)))) * 0.64)).sum() # NLL through Local Reparametrization mu, si = T.dot(X, theta.T), T.sqrt( T.dot(X * X, (alpha * theta * theta).T)) activation = mu + self._srng.normal(mu.shape, avg=0, std=1) * si predictions = T.nnet.softmax(activation) ell = -T.sum( categorical_crossentropy(predictions, one_hot(t, num_classes))) # Objective Negative SGVLB nlb = -(N / batch_size * ell + mD_KL) # Optimization Method and Function Compiling opt = lasagne.updates.adam(nlb, [log_sigma2, theta], learning_rate=lr, beta1=beta) lbf = function([X, t], nlb, updates=opt) return lbf, theta, log_sigma2
def test_one_hot(): from lasagne.utils import one_hot a = np.random.randint(0, 10, 20) b = np.zeros((a.size, a.max()+1)) b[np.arange(a.size), a] = 1 result = one_hot(a).eval() assert (result == b).all()
def test_one_hot(): from lasagne.utils import one_hot a = np.random.randint(0, 10, 20) b = np.zeros((a.size, a.max() + 1)) b[np.arange(a.size), a] = 1 result = one_hot(a).eval() assert (result == b).all()
def test_exec_(self, set='test'): assert(set in ['val', 'test']) if self.num_classes_ > 2: n = self.num_classes_ else: n = 1 batches = self.train_loader.batch_gen(self.batch_sz, shuffle=False, async=False) test_loss = 0 test_acc = 0 test_batches = 0 for batch in batches: x, y, paths = batch if len(paths) < x.shape[0]: # non-full batch x = x[:len(paths)] y = y[:len(paths)] x = np.asarray(x, dtype=theano.config.floatX) y = np.asarray(one_hot(y).eval(), dtype=theano.config.floatX) t0 = time.time() retval = self.iter_funcs_['val'](x, y) loss, acc = retval test_loss += loss test_acc += acc test_batches += 1 test_loss = test_loss / test_batches test_acc = test_acc / test_batches * 100.0 log.info('%s loss: %f, accuracy %f %%' % (set, test_loss, test_acc)) return test_loss
def train(self): epoch = 0 i = 0 while epoch < self.train_epochs: epoch += 1 log.info('Epoch %d / %d' % (epoch, self.train_epochs)) # get data batches = self.train_loader.batch_gen(self.batch_sz, shuffle=self.shuffle_train, async=False) # iterate t0_bl = time.time() for batch in batches: time_bl = time.time() - t0_bl x, y, paths = batch if len(paths) < x.shape[0]: # non-full batch x = x[:len(paths)] y = y[:len(paths)] if i % self.val_freq == 0: # VALIDATION ----------- log.info('Calculating validation loss...') val_loss = self.val() if self.snapshot_freq > 0 and i % self.snapshot_freq == 0: snapshot_file = ('snapshot_iter%d_%d_%d.pkl' % (i, epoch, val_loss)) snapshot_path = os.path.join(self.snapshot_dir, snapshot_file) log.info('Saving snapshot %s...' % snapshot_file) self.save_weights(snapshot_path) # TRAIN ----------- x = np.asarray(x, dtype=theano.config.floatX) y = np.asarray(one_hot(y).eval(), dtype=theano.config.floatX) t0 = time.time() loss, output, mat, grad = self.iter_funcs_['train'](x, y) if i % self.val_freq == 0: log.info('Iteration %d, train loss: %f \t [%.2f (%.2f) s]' % (i, loss, time.time()-t0, time_bl)) i += 1 t0_bl = time.time()
from lasagne.layers import get_output, InputLayer from lasagne.utils import one_hot import nolearn from nolearn.lasagne import NeuralNet, BatchIterator, TrainSplit, objective # Custom output class from NNModel_theano import NNModel_theano # Libraries for data processing from sys import stdout from collections import OrderedDict import time # Functions for the modifiedObjective method oneHot = lambda pred, label: one_hot(label, np.array(pred).shape[-1]) lossFn = lambda pred, label: categorical_crossentropy(pred, oneHot( pred, label)) def categorical_crossentropy_logdomain(log_predictions, targets): return -T.sum(targets * log_predictions, axis=1) # Custom objective function capable of adding total variation regularization. def modifiedObjective(layers, loss_function, target, aggregate=aggregate, deterministic=False, l1=0,