def __init__(self, layerID, inputSize, kernelSize, downsampleFactor, learningRate=0.001, dropout=None, contractionRate=None, initialWeights=None, initialHidThresh=None, initialVisThresh=None, activation=t.nnet.sigmoid, randomNumGen=None): ConvolutionalLayer.__init__(self, layerID=layerID, inputSize=inputSize, kernelSize=kernelSize, downsampleFactor=downsampleFactor, learningRate=learningRate, dropout=dropout, initialWeights=initialWeights, initialThresholds=initialHidThresh, activation=activation, randomNumGen=randomNumGen) AutoEncoder.__init__(self, 1. / np.prod(kernelSize[:]) if \ contractionRate is None else \ contractionRate) # setup initial values for the hidden thresholds if initialVisThresh is None: initialVisThresh = np.zeros((self._inputSize[1], ), dtype=config.floatX) self._thresholdsBack = shared(value=initialVisThresh, borrow=True)
def __init__ (self, layerID, inputSize, kernelSize, downsampleFactor, regType=None, learningRate=0.001, momentumRate=0.9, dropout=None, contractionRate=None, initialWeights=None, initialHidThresh=None, initialVisThresh=None, activation=t.nnet.sigmoid, forceSparsity=True, randomNumGen=None) : from nn.reg import Regularization ConvolutionalLayer.__init__(self, layerID=layerID, inputSize=inputSize, kernelSize=kernelSize, downsampleFactor=downsampleFactor, learningRate=learningRate, momentumRate=momentumRate, dropout=dropout, initialWeights=initialWeights, initialThresholds=initialHidThresh, activation=activation, randomNumGen=randomNumGen) AutoEncoder.__init__(self, forceSparsity, 1. / np.prod(kernelSize[:]) \ if contractionRate is None else contractionRate) # setup initial values for the hidden thresholds if initialVisThresh is None : initialVisThresh = np.zeros((self._inputSize[1],), dtype=config.floatX) self._thresholdsBack = shared(value=initialVisThresh, borrow=True) self._regularization = Regularization(regType, self._contractionRate)
def finalize(self, networkInput, layerInput): '''Setup the computation graph for this layer. networkInput : the input variable tuple for the network format (inClass, inTrain) layerInput : the input variable tuple for this layer format (inClass, inTrain) ''' from nn.costUtils import calcLoss, leastSquares, calcSparsityConstraint ConvolutionalLayer.finalize(self, networkInput, layerInput) weightsBack = self._getWeightsBack() # setup the decoder -- # this take the output of the feedforward process as input and # and runs the output back through the network in reverse. The net # effect is to reconstruct the input, and ultimately to see how well # the network is at encoding the message. unpooling = self._unpool_2d(self.output[1], self._downsampleFactor) decodedInput = self._decode(unpooling) # DEBUG: For Debugging purposes only self.reconstruction = function([networkInput[0]], decodedInput) sparseConstr = calcSparsityConstraint(self.output[0], self.getOutputSize()) # compute the jacobian cost of the output -- # This works as a sparsity constraint in case the hidden vector is # larger than the input vector. jacobianMat = conv2d(unpooling * (1 - unpooling), weightsBack, self.getFeatureSize(), weightsBack.shape.eval(), border_mode='full') jacobianCost = leastSquares(jacobianMat, self._inputSize[0], self._contractionRate) # create the negative log likelihood function -- # this is our cost function with respect to the original input # NOTE: The jacobian was computed however takes much longer to process # and does not help convergence or regularization. It was removed cost = calcLoss(self.input[0], decodedInput, self._activation) / \ self.getInputSize()[0] self._costs = [cost, jacobianCost, sparseConstr] gradients = t.grad(t.sum(self._costs), self.getWeights()) self._updates = [ (weights, weights - self._learningRate * gradient) for weights, gradient in zip(self.getWeights(), gradients) ] # TODO: this needs to be stackable and take the input to the first # layer, not just the input of this layer. This will ensure # the other layers are activated to get the input to this layer # DEBUG: For Debugging purposes only self.trainLayer = function([networkInput[0]], self._costs, updates=self._updates)
def __setstate__(self, dict): '''Load network pickle''' from theano import shared # remove any current functions from the object so we force the # theano functions to be rebuilt with the new buffers if hasattr(self, 'reconstruction'): delattr(self, 'reconstruction') if hasattr(self, '_costs'): delattr(self, '_costs') if hasattr(self, '_updates'): delattr(self, '_updates') if hasattr(self, 'trainLayer'): delattr(self, 'trainLayer') ConvolutionalLayer.__setstate__(self, dict) initialThresholdsBack = self._thresholdsBack self._thresholdsBack = shared(value=initialThresholdsBack, borrow=True)
def __setstate__(self, dict) : '''Load layer pickle''' from theano import shared # remove any current functions from the object so we force the # theano functions to be rebuilt with the new buffers if hasattr(self, 'reconstruction') : delattr(self, 'reconstruction') if hasattr(self, '_costs') : delattr(self, '_costs') if hasattr(self, '_costLabels') : delattr(self, '_costLabels') if hasattr(self, '_updates') : delattr(self, '_updates') if hasattr(self, 'trainLayer') : delattr(self, 'trainLayer') ConvolutionalLayer.__setstate__(self, dict) initialThresholdsBack = self._thresholdsBack self._thresholdsBack = shared(value=initialThresholdsBack, borrow=True)
def __getstate__(self): '''Save network pickle''' from dataset.shared import fromShared dict = ConvolutionalLayer.__getstate__(self) dict['_thresholdsBack'] = fromShared(self._thresholdsBack) # remove the functions -- they will be rebuilt JIT if 'reconstruction' in dict: del dict['reconstruction'] if '_costs' in dict: del dict['_costs'] if '_updates' in dict: del dict['_updates'] if 'trainLayer' in dict: del dict['trainLayer'] return dict
def __getstate__(self) : '''Save layer pickle''' from dataset.shared import fromShared dict = ConvolutionalLayer.__getstate__(self) dict['_thresholdsBack'] = fromShared(self._thresholdsBack) # remove the functions -- they will be rebuilt JIT if 'reconstruction' in dict : del dict['reconstruction'] if '_costs' in dict : del dict['_costs'] if '_costLabels' in dict : del dict['_costLabels'] if '_updates' in dict : del dict['_updates'] if 'trainLayer' in dict : del dict['trainLayer'] return dict
def createNetwork(inputSize, numKernels, numNeurons, numLabels): from nn.net import ClassifierNetwork from six.moves import reduce localPath = './local.pkl.gz' network = ClassifierNetwork() lr = [.08, .05, .02] mr = [.8, .8, .8] # add convolutional layers network.addLayer( ConvolutionalLayer(layerID='c1', inputSize=inputSize, kernelSize=(numKernels, inputSize[1], 3, 3), downsampleFactor=(3, 3), randomNumGen=rng, learningRate=lr[0], momentumRate=mr[0])) # add fully connected layers network.addLayer( ContiguousLayer(layerID='f2', inputSize=(network.getNetworkOutputSize()[0], reduce(mul, network.getNetworkOutputSize()[1:])), numNeurons=numNeurons, randomNumGen=rng, learningRate=lr[1], momentumRate=mr[1])) network.addLayer( ContiguousLayer(layerID='f3', inputSize=network.getNetworkOutputSize(), numNeurons=numLabels, learningRate=lr[2], momentumRate=mr[2], activation=None, randomNumGen=rng)) # save it to disk in order to load it into both networks network.save(localPath) return localPath
def createNetwork(inputSize, numKernels, numNeurons, numLabels): # create a random number generator for efficiency from numpy.random import RandomState from operator import mul rng = RandomState(int(time())) trainSize = inputSize # create the network network = Net() # add convolutional layers network.addLayer( ConvolutionalLayer(layerID='c1', inputSize=trainSize, kernelSize=(numKernels, trainSize[1], 7, 7), downsampleFactor=(2, 2), randomNumGen=rng, learningRate=.09, momentumRate=.9)) # add fully connected layers network.addLayer( ContiguousLayer(layerID='f2', inputSize=(network.getNetworkOutputSize()[0], reduce(mul, network.getNetworkOutputSize()[1:])), numNeurons=numNeurons, randomNumGen=rng, learningRate=.03, momentumRate=.7)) network.addLayer( ContiguousLayer(layerID='f3', inputSize=network.getNetworkOutputSize(), numNeurons=numLabels, learningRate=.01, momentumRate=.7, activation=None, randomNumGen=rng)) return network
(2 * options.kernel * 5 * 5 + options.neuron + labels.shape[0]), prof=prof) if options.synapse is not None: # load a previously saved network network.load(options.synapse) else: log.info('Initializing Network...') # add convolutional layers network.addLayer( ConvolutionalLayer(layerID='c1', inputSize=trainSize[1:], kernelSize=(options.kernel, trainSize[2], 5, 5), downsampleFactor=(2, 2), learningRate=options.learnC, momentumRate=options.momentum, dropout=.8 if options.dropout else 1., activation=t.nnet.relu, randomNumGen=rng)) # refactor the output to be (numImages*numKernels, 1, numRows, numCols) # this way we don't combine the channels kernels we created in # the first layer and destroy our dimensionality network.addLayer( ConvolutionalLayer(layerID='c2', inputSize=network.getNetworkOutputSize(), kernelSize=(options.kernel, options.kernel, 5, 5), downsampleFactor=(2, 2), learningRate=options.learnC,
def finalize(self, networkInput, layerInput) : '''Setup the computation graph for this layer. networkInput : the input variable tuple for the network format (inClass, inTrain) layerInput : the input variable tuple for this layer format (inClass, inTrain) ''' from nn.costUtils import calcLoss, leastSquares, \ calcSparsityConstraint, compileUpdate from dataset.shared import getShape ConvolutionalLayer.finalize(self, networkInput, layerInput) weightsBack = self._getWeightsBack() self._costs = [] self._costLabels = [] # setup the decoder -- # this take the output of the feedforward process as input and # and runs the output back through the network in reverse. The net # effect is to reconstruct the input, and ultimately to see how well # the network is at encoding the message. decodedInput = self.buildDecoder(self.output[0]) # DEBUG: For Debugging purposes only self.reconstruction = function([networkInput[0]], decodedInput) # NOTE: Sparsity is not a useful constraint on convolutional layers # contraction is only applicable in the non-binary case if not self._forceSparse : # compute the jacobian cost of the output -- # This works as a sparsity constraint in case the hidden vector is # larger than the input vector. unpooling = self._unpool_2d(self.output[0], self._downsampleFactor) jacobianMat = conv2d(unpooling * (1. - unpooling), weightsBack, self.getFeatureSize(), tuple(weightsBack.shape.eval()), border_mode='full') self._costs.append(leastSquares(jacobianMat, self._contractionRate)) self._costLabels.append('Jacob') # add regularization if it was user requested regularization = self._regularization.calculate([self]) if regularization is not None : self._costs.append(regularization) self._costLabels.append('Regularization') # create the negative log likelihood function -- # this is our cost function with respect to the original input # NOTE: The jacobian was computed however takes much longer to process # and does not help convergence or regularization. It was removed self._costs.append(calcLoss( self.input[0], decodedInput, self._activation, scaleFactor=1. / self.getInputSize()[1])) self._costLabels.append('Local Cost') gradients = t.grad(t.sum(self._costs) / getShape(networkInput[0])[0], self.getWeights()) self._updates = compileUpdate(self.getWeights(), gradients, self._learningRate, self._momentumRate) # TODO: this needs to be stackable and take the input to the first # layer, not just the input of this layer. This will ensure # the other layers are activated to get the input to this layer # DEBUG: For Debugging purposes only self.trainLayer = function([networkInput[0]], self._costs, updates=self._updates)