def __init__(self, input_size, conv_layers, hidden_layers, output_size, output_layer=LogisticRegression, conv_filter_size=5, conv_pool_size=2, lamda=0): """ Parameters: ------ inpute_size: int The width of input image.(assume that input image is square) conv_layers: list A list of numbers of feature maps in each convlution layer. hidden_layers: list A list of numbers of units in each hidden layer of fully connected mlp. output_size: int The number of output layer units. output_layer: object conv_filter_size: int The convlution factor. conv_pool_size: int The downsampling (pooling) factor. lamda: float Parameter used for regularization. Set 0 to disable regularize. """ self.lamda = shared(lamda, name='lamda') image_shape = (None, 1, input_size, input_size) self.conv_layers = [] for c in conv_layers: filter_shape = ( c, image_shape[1], conv_filter_size, conv_filter_size ) cp = LeNetConvPoolLayer( filter_shape, image_shape, lamda, conv_pool_size) image_shape = cp.output_shape self.conv_layers.append(cp) self.mlp = MLP(np.prod(image_shape[1:]), hidden_layers, output_size, output_layer=output_layer, lamda=self.lamda.get_value()) self.layers = self.conv_layers + [self.mlp]
class LeNet(Layer): """TODO""" def __init__(self, input_size, conv_layers, hidden_layers, output_size, output_layer=LogisticRegression, conv_filter_size=5, conv_pool_size=2, lamda=0): """ Parameters: ------ inpute_size: int The width of input image.(assume that input image is square) conv_layers: list A list of numbers of feature maps in each convlution layer. hidden_layers: list A list of numbers of units in each hidden layer of fully connected mlp. output_size: int The number of output layer units. output_layer: object conv_filter_size: int The convlution factor. conv_pool_size: int The downsampling (pooling) factor. lamda: float Parameter used for regularization. Set 0 to disable regularize. """ self.lamda = shared(lamda, name='lamda') image_shape = (None, 1, input_size, input_size) self.conv_layers = [] for c in conv_layers: filter_shape = ( c, image_shape[1], conv_filter_size, conv_filter_size ) cp = LeNetConvPoolLayer( filter_shape, image_shape, lamda, conv_pool_size) image_shape = cp.output_shape self.conv_layers.append(cp) self.mlp = MLP(np.prod(image_shape[1:]), hidden_layers, output_size, output_layer=output_layer, lamda=self.lamda.get_value()) self.layers = self.conv_layers + [self.mlp] @property def theta(self): return np.hstack([l.theta for l in self.layers]) @theta.setter def theta(self, val): start = 0 for l in self.layers: size = np.prod(l.theta.shape) end = start + size l.theta = val[start:end] start = end def _cost(self, X, y): return self.mlp._cost(X, y) def _gradient(self, cost): # Theano is awesome!!! return T.concatenate([l._gradient(cost) for l in self.layers]) def _l2_regularization(self, m): return T.sum([l._l2_regularization(m) for l in self.layers]) def _feedforward(self, X): input = X for c in self.conv_layers: input = c.output(input) return self.mlp._feedforward(input.flatten(2)) def _cost_and_gradient(self, X, y): X, y = self.preprocess(X, y) m = y.shape[0] #assert m.eval() == self.batch_size reg = self._l2_regularization(m) J = self._cost(X, y) + reg grad = self._gradient(J) return J, grad def preprocess(self, X, y=None): if y is not None: y = y.flatten() # make sure that y is a vector input_shape = list(self.layers[0].image_shape) input_shape[0] = -1 # batch size is not a constant X = X.reshape(input_shape) output_layer_input = self.add_bias(self._feedforward(X)) return output_layer_input, y def predict(self, dataset): """Return predicted class labels as a numpy vecter Parameters ------ dataset: dataset """ X, _ = self.preprocess(dataset.X) y = theano.function([], self._predict_y(X)) return y() def errors(self, dataset): """Return a float representing the error rate Parameters ------ dataset: dataset """ X, y = self.preprocess(dataset.X, dataset.y) e = theano.function([], self._errors(X, y)) return e() def _predict_y(self, X): return self.mlp.output(X) def _errors(self, X, y): """Compute the rate of predict_y_i != y_i Parameters ------ X: tensor like feature matrix y: tensor like class label """ return T.mean(T.neq(self._predict_y(X), y))
#! /usr/bin/env python3 # -*- coding: utf-8 -*- from mnist.mnist import get_training_set, get_test_set from nylearn.nnet import MLP from nylearn.dataset import Dataset from nylearn.train import ConjugateGradientDescent as cg from nylearn.train import MinibatchGradientDescent as mbgd from nylearn.train import momentum, decay tr_x, tr_y = get_training_set(60000) te_x, te_y = get_test_set(10000) training_set = Dataset(tr_x[:50000]/256, tr_y[:50000]) validation_set = Dataset(tr_x[50000:]/256, tr_y[50000:]) test_set = Dataset(te_x/256, te_y) m = momentum(0.5, 0.99, end=400) d = decay(0.99, 10) nn = MLP(tr_x.shape[1], [500], 10, lamda=0.01) tn = mbgd(nn) last = tn.train(training_set, maxiter=600, batch_size=50, eta=0.01, validation_set=validation_set, momentum=m, adjust_eta=d) print('validation set error: {}, test set error: {}'.format( nn.errors(validation_set), nn.errors(test_set))) nn.save('mnist/nn-500-10') nn.theta = last print('last theta test set error: {}'.format(nn.errors(test_set)))