def __init__(self, rng, hiddenLayerList, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type hiddenLayerList: [HiddenLayer instances] :param hiddenLayerList: A list of hidden layers :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # connect hidden layers (no need to, they're already connected outside when building them) self.hiddenLayers=hiddenLayerList # prevLy=hiddenLayerList[0] # prevLy.input=input # for ly in hiddenLayerList[1:]: # ly.input=prevLy.output # prevLy=ly # The logistic regression layer gets as input the hidden units of the hidden layer self.logRegressionLayer = LogisticRegression( input=hiddenLayerList[-1].output, n_in=hiddenLayerList[-1].inOutDim[1], n_out=n_out) # symbolic variables for data self.X=self.hiddenLayers[0].input # training data self.y=T.bvector('y') # labels for training data # L1 norm ; one regularization option is to enforce L1 norm to be small self.L1 = abs(self.logRegressionLayer.W).sum() for ly in self.hiddenLayers: self.L1 += abs(ly.W).sum() # square of L2 norm ; one regularization option is to enforce square of L2 norm to be small self.L2_sqr = (self.logRegressionLayer.W ** 2).sum() for ly in self.hiddenLayers: self.L2_sqr += (ly.W ** 2).sum() # negative log likelihood of the MLP is given by the negative log likelihood of the output # of the model, computed in the logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the all layers self.params=self.logRegressionLayer.params for ly in self.hiddenLayers: self.params+=ly.params
class MLP(object): """Multi-Layer Perceptron Class A multilayer perceptron is a feedforward artificial neural network model that has one layer or more of hidden units and nonlinear activations. Intermediate layers usually have as activation function tanh or the sigmoid function (defined here by a ``HiddenLayer`` class) while the top layer is a softamx layer (defined here by a ``LogisticRegression`` class). """ def __init__(self, rng, hiddenLayerList, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type hiddenLayerList: [HiddenLayer instances] :param hiddenLayerList: A list of hidden layers :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # connect hidden layers (no need to, they're already connected outside when building them) self.hiddenLayers=hiddenLayerList # prevLy=hiddenLayerList[0] # prevLy.input=input # for ly in hiddenLayerList[1:]: # ly.input=prevLy.output # prevLy=ly # The logistic regression layer gets as input the hidden units of the hidden layer self.logRegressionLayer = LogisticRegression( input=hiddenLayerList[-1].output, n_in=hiddenLayerList[-1].inOutDim[1], n_out=n_out) # symbolic variables for data self.X=self.hiddenLayers[0].input # training data self.y=T.bvector('y') # labels for training data # L1 norm ; one regularization option is to enforce L1 norm to be small self.L1 = abs(self.logRegressionLayer.W).sum() for ly in self.hiddenLayers: self.L1 += abs(ly.W).sum() # square of L2 norm ; one regularization option is to enforce square of L2 norm to be small self.L2_sqr = (self.logRegressionLayer.W ** 2).sum() for ly in self.hiddenLayers: self.L2_sqr += (ly.W ** 2).sum() # negative log likelihood of the MLP is given by the negative log likelihood of the output # of the model, computed in the logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the all layers self.params=self.logRegressionLayer.params for ly in self.hiddenLayers: self.params+=ly.params def predictProba(self,inputs): prevLy=self.hiddenLayers[0] prevLy.activate(inputs) for ly in self.hiddenLayers[1:]: ly.activate(prevLy.output) prevLy=ly return self.logRegressionLayer.activate(ly.output) def buildTrainFunc(self, trSetX, trSetY, batchSize, default_learningRate=0.1, L1_reg=0, L2_reg=0): '''Generates the training function. :type datasets: list of pairs of theano.tensor.TensorType :param datasets: The training data. :type batch_size: int :param batch_size: size of a minibatch :type default_learningRate: float :param default_learningRate: Initial learning rate :type L1_regu: float :param L1_regu: regularization parameter for L1 :type L2_regu: float :param L2_regu: regularization parameter for L2 ''' index = T.lscalar('index') # index to a [mini]batch epoch = T.lscalar('epoch') # epoch to reduce learning rate self.learningRate = theano.shared(float32(default_learningRate), 'lr')# learning rate to use # cost function cost = self.negative_log_likelihood(self.y)+L1_reg * self.L1+L2_reg * self.L2_sqr # gradient of cost with respect to weights in Neu Net gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - self.learningRate * gparam)) #updates.append((learningRate, T.cast(learningRate/(1+0.001*epoch),dtype=theano.config.floatX))) trainFunc = theano.function(inputs=[index], outputs=cost,#function(inputs=[index, epoch], outputs=cost, updates=updates, givens={self.X: trSetX[index * batchSize:(index + 1) * batchSize], self.y: trSetY[index * batchSize:(index + 1) * batchSize]}) return trainFunc