def pretrain(self, batches, test_images, n_epochs=10, **train_params): data = tt.matrix('data', dtype=self.dtype) cost, updates = self.get_cost_updates(data, **train_params) train_rbm = theano.function([data], cost, updates=updates) for epoch in range(n_epochs): # train on each mini-batch costs = [] for batch in batches: costs.append(train_rbm(batch)) print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) # plot reconstructions on test set plt.figure(2) plt.clf() x = test_images y = rbm.encode(test_images) z = rbm.decode(y) plotting.compare([x.reshape(-1, 28, 28), z.reshape(-1, 28, 28)], rows=5, cols=20, vlims=(-1, 2) if GAUSSIAN else (0, 1)) plt.draw() print "Test error:", rms(x - z, axis=1).mean() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def auto_sgd(self, images, test_images=None, batch_size=100, rate=0.1, n_epochs=10): dtype = theano.config.floatX params = [] for auto in self.autos: params.extend((auto.W, auto.c, auto.b)) # --- compute backprop function x = tt.matrix('images') xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype) # compute coding error y = self.propup(xn) z = self.propdown(y) rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) error = tt.mean(rmses) # compute gradients grads = tt.grad(error, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad for auto in self.autos: if auto.mask is not None: updates[auto.W] = updates[auto.W] * auto.mask train_dbn = theano.function([x], error, updates=updates) reconstruct = self.reconstruct # --- perform SGD batches = images.reshape(-1, batch_size, images.shape[1]) assert np.isfinite(batches).all() for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_dbn(batch)) # self.check_params() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = reconstruct(test_images) show_recons(test_images, recons) plt.draw() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.autos[0].filters, rows=10, cols=20) plt.draw()
def pretrain(self, batches, test_images, n_epochs=10, **train_params): data = tt.matrix("data", dtype=self.dtype) cost, updates = self.get_cost_updates(data, **train_params) train_rbm = theano.function([data], cost, updates=updates) for epoch in range(n_epochs): # train on each mini-batch costs = [] for batch in batches: costs.append(train_rbm(batch)) print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) # plot reconstructions on test set plt.figure(2) plt.clf() x = test_images y = rbm.encode(test_images) z = rbm.decode(y) plotting.compare( [x.reshape(-1, 28, 28), z.reshape(-1, 28, 28)], rows=5, cols=20, vlims=(-1, 2) if GAUSSIAN else (0, 1) ) plt.draw() print "Test error:", rms(x - z, axis=1).mean() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def pretrain(self, batches, dbn=None, test_images=None, n_epochs=10, **train_params): data = tt.matrix('data', dtype=self.dtype) cost, updates = self.get_cost_updates(data, **train_params) train_rbm = theano.function([data], cost, updates=updates) for epoch in range(n_epochs): # train on each mini-batch costs = [] for batch in batches: costs.append(train_rbm(batch)) print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if dbn is not None and test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = dbn.reconstruct(test_images) plotting.compare([test_images.reshape(-1, 28, 28), recons.reshape(-1, 28, 28)], rows=5, cols=20) plt.draw() # plot filters for first layer only if dbn is not None and self is dbn.rbms[0]: plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def auto_sgd_down(self, images, test_images=None, batch_size=100, rate=0.1, n_epochs=10): dtype = theano.config.floatX params = [] for auto in self.autos: auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V') params.extend((auto.V, auto.b)) # --- compute backprop function x = tt.matrix('images') xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype) # compute coding error y = self.propup(xn) z = self.propdown(y) rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) error = tt.mean(rmses) # compute gradients grads = tt.grad(error, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad for auto in self.autos: if auto.mask is not None: updates[auto.V] = updates[auto.V] * auto.mask.T train_dbn = theano.function([x], error, updates=updates) reconstruct = self.reconstruct # --- perform SGD batches = images.reshape(-1, batch_size, images.shape[1]) assert np.isfinite(batches).all() for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_dbn(batch)) # self.check_params() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = reconstruct(test_images) show_recons(test_images, recons) plt.draw() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.autos[0].filters, rows=10, cols=20) plt.draw()
def sgd_backprop(self, images, test_images, batch_size=100, rate=0.1, n_epochs=10): dtype = theano.config.floatX params = [self.W, self.c, self.b] # --- compute backprop function x = tt.matrix('images') xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype) # compute coding error y = self.propup(xn) z = self.propdown(y) rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) error = tt.mean(rmses) # compute gradients grads = tt.grad(error, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad if self.mask is not None: updates[self.W] = updates[self.W] * self.mask train_dbn = theano.function([x], error, updates=updates) # --- perform SGD batches = images.reshape(-1, batch_size, images.shape[1]) assert np.isfinite(batches).all() for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_dbn(batch)) self.check_params() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) # plot reconstructions on test set plt.figure(2) plt.clf() x = test_images y = self.encode(test_images) z = self.decode(y) plotting.compare( [x.reshape(-1, 28, 28), z.reshape(-1, 28, 28)], rows=5, cols=20, vlims=(-1, 2)) plt.draw() print "Test error:", rms(x - z, axis=1).mean() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def auto_sgd(self, images, deep=None, test_images=None, batch_size=100, rate=0.1, n_epochs=10): dtype = theano.config.floatX params = [self.W, self.c, self.b] # --- compute backprop function x = tt.matrix('images') xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype) y = self.propup(xn, noise=0.1) z = self.propdown(y) # compute coding error rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) error = tt.mean(rmses) # compute gradients grads = tt.grad(error, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad if self.mask is not None: updates[self.W] = updates[self.W] * self.mask train_dbn = theano.function([x], error, updates=updates) reconstruct = deep.reconstruct # --- perform SGD batches = images.reshape(-1, batch_size, images.shape[1]) assert np.isfinite(batches).all() print batches.shape for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_dbn(batch)) self.check_params() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if deep is not None and test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = reconstruct(test_images) show_recons(test_images, recons) plt.draw() # plot filters for first layer only if deep is not None and self is deep.autos[0]: plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def pretrain(self, batches, dbn=None, test_images=None, n_epochs=10, **train_params): data = tt.matrix('data', dtype=self.dtype) cost, updates = self.get_cost_updates(data, **train_params) train_rbm = theano.function([data], cost, updates=updates) for epoch in range(n_epochs): # train on each mini-batch costs = [] for batch in batches: costs.append(train_rbm(batch)) print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if dbn is not None and test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = dbn.reconstruct(test_images) plotting.compare([ test_images.reshape(-1, 28, 28), recons.reshape(-1, 28, 28) ], rows=5, cols=20) plt.draw() # plot filters for first layer only if dbn is not None and self is dbn.rbms[0]: plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def f_df_wrapper(p): for param, value in zip(params, split_params(p, np_params)): param.set_value(value.astype(param.dtype)) outs = f_df() cost, grads = outs[0], outs[1:] grad = join_params(grads) if deep is not None and test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = reconstruct(test_images) show_recons(test_images, recons) plt.draw() # plot filters for first layer only if deep is not None and self is deep.autos[0]: plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw() return cost.astype('float64'), grad.astype('float64')
cost, updates = rbm.get_cost_updates(lr=0.02, persistent=persistent, k=1) train_rbm = theano.function([rbm.input], cost, updates=updates) ha, hp = rbm.propup(rbm.input) va, vp = rbm.propdown(hp) reconstruct_rbm = theano.function([rbm.input], vp) for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_rbm(batch)) print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) weights = rbm.W.get_value() plt.figure(2) plt.clf() plotting.filters(weights.T.reshape(-1, 28, 28), rows=5, cols=10) test_batch = batches[0] recons = reconstruct_rbm(test_batch) plt.figure(3) plt.clf() plotting.compare([test_batch.reshape(-1, 28, 28), recons.reshape(-1, 28, 28)], rows=5, cols=20) plt.draw()
def sgd(self, train_set, test_set, rate=0.1, tradeoff=0.5, n_epochs=30, batch_size=100): """Use SGD to do combined autoencoder and classifier training""" dtype = theano.config.floatX assert tradeoff >= 0 and tradeoff <= 1 params = [] for auto in self.autos: auto.V = theano.shared(auto.W.get_value(borrow=False).T, name='V') params.extend([auto.W, auto.V, auto.c, auto.b]) # --- compute backprop function assert self.W is not None and self.b is not None W = theano.shared(self.W.astype(dtype), name='Wc') b = theano.shared(self.b.astype(dtype), name='bc') x = tt.matrix('batch') y = tt.ivector('labels') xn = x # xn = x + self.theano_rng.normal(size=x.shape, std=0.1, dtype=dtype) yn = self.propup(xn, noise=1.0) # compute classification error # p_y_given_x = tt.nnet.softmax(tt.dot(yn, W) + b) # y_pred = tt.argmax(p_y_given_x, axis=1) # nll = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y]) # class_error = tt.mean(tt.neq(y_pred, y)) yc = tt.dot(yn, W) + b class_cost = multi_hinge_margin(yc, y).mean() class_error = tt.mean(tt.neq(tt.argmax(yc, axis=1), y)) # compute autoencoder error z = self.propdown(yn) rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) auto_cost = tt.mean(rmses) cost = (tt.cast(1 - tradeoff, dtype) * auto_cost + tt.cast(tradeoff, dtype) * class_cost) error = class_error # compute gradients grads = tt.grad(cost, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad for auto in self.autos: if auto.mask is not None: updates[auto.W] = updates[auto.W] * auto.mask updates[auto.V] = updates[auto.V] * auto.mask.T train_dbn = theano.function([x, y], error, updates=updates) reconstruct = self.reconstruct # --- perform SGD images, labels = train_set ibatches = images.reshape(-1, batch_size, images.shape[1]) lbatches = labels.reshape(-1, batch_size).astype('int32') assert np.isfinite(ibatches).all() test_images, test_labels = test_set for epoch in range(n_epochs): costs = [] for batch, label in zip(ibatches, lbatches): costs.append(train_dbn(batch, label)) # copy back parameters (for test function) self.W = W.get_value() self.b = b.get_value() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() recons = reconstruct(test_images) show_recons(test_images, recons) plt.draw() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.autos[0].filters, rows=10, cols=20) plt.draw()
def auto_sgd(self, images, deep=None, test_images=None, batch_size=100, rate=0.1, noise=1., n_epochs=10): assert not hasattr(self, 'V') dtype = theano.config.floatX params = [self.W, self.c, self.b] # --- compute backprop function x = tt.matrix('images') xn = x + self.theano_rng.normal(size=x.shape, std=noise, dtype=dtype) y = self.propup(xn) z = self.propdown(y) # compute coding error rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) error = tt.mean(rmses) # compute gradients grads = tt.grad(error, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad if self.mask is not None: updates[self.W] = updates[self.W] * self.mask train_dbn = theano.function([x], error, updates=updates) # reconstruct = deep.reconstruct if deep is not None else None encode = deep.encode if deep is not None else None decode = deep.decode if deep is not None else None # --- perform SGD batches = images.reshape(-1, batch_size, images.shape[1]) assert np.isfinite(batches).all() for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_dbn(batch)) self.check_params() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) if deep is not None and test_images is not None: # plot reconstructions on test set plt.figure(2) plt.clf() test = test_images codes = encode(test) recs = decode(codes) # recons = reconstruct(test_images) show_recons(test, recs) plt.draw() print "Test set: (error: %0.3f) (sparsity: %0.3f)" % (rms( test - recs, axis=1).mean(), (codes > 0).mean()) # plot filters for first layer only if deep is not None and self is deep.autos[0]: plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()
def sgd_backprop(self, images, test_images, batch_size=100, rate=0.1, n_epochs=10): dtype = theano.config.floatX params = [self.W, self.c, self.b] # --- compute backprop function x = tt.matrix('images') xn = x + self.theano_rng.normal(size=x.shape, std=1, dtype=dtype) # compute coding error y = self.propup(xn) z = self.propdown(y) rmses = tt.sqrt(tt.mean((x - z)**2, axis=1)) error = tt.mean(rmses) # compute gradients grads = tt.grad(error, params) updates = collections.OrderedDict() for param, grad in zip(params, grads): updates[param] = param - tt.cast(rate, dtype) * grad if self.mask is not None: updates[self.W] = updates[self.W] * self.mask train_dbn = theano.function([x], error, updates=updates) # --- perform SGD batches = images.reshape(-1, batch_size, images.shape[1]) assert np.isfinite(batches).all() for epoch in range(n_epochs): costs = [] for batch in batches: costs.append(train_dbn(batch)) self.check_params() print "Epoch %d: %0.3f" % (epoch, np.mean(costs)) # plot reconstructions on test set plt.figure(2) plt.clf() x = test_images y = self.encode(test_images) z = self.decode(y) plotting.compare([x.reshape(-1, 28, 28), z.reshape(-1, 28, 28)], rows=5, cols=20, vlims=(-1, 2)) plt.draw() print "Test error:", rms(x - z, axis=1).mean() # plot filters for first layer only plt.figure(3) plt.clf() plotting.filters(self.filters, rows=10, cols=20) plt.draw()