def doForwardPropagation_asLayer(self, X, theta): ''' Wrapper function for doForwardPropagation for cases where Softmax model is used as a layer of a deep network. Arguments X : data matrix in the form [input dim., number of samples] theta : model parameters for the first layer, must be packed as [weights+biases] Returns activation : activation if the first layer ''' assert self.isInitialized, 'ERROR:Softmax:doForwardPropagationAsLayer: The instance is not properly initialized' assert X.shape[0]==self.nParams, 'ERROR:Softmax:doForwardPropagation: Dimensions of given data do not match with the number of parameters' assert np.size(theta)==self.thetaMatrixPrototype[0]*self.thetaMatrixPrototype[1], 'ERROR:Softmax:doForwardPropagation: Dimensions of given weights do not match the internal structure' weights = self.unrollParameters(theta); activation = self.doForwardPropagation(X, weights); # Convert output to probabilities: aux2 = AuxFunctions.doUnbalancedMatrixOperation(activation, np.amax(activation, 0), 'sub', axis=0); #Substracts the maximm value of the matrix "aux". aux3 = np.exp(aux2); y = AuxFunctions.doUnbalancedMatrixOperation(aux3, np.sum(aux3, 0), 'div', axis=0); #I divides the vector "aux3" by the sum of its elements. return y;
def l2rowscaledg(self, x, y, outderv, alpha): ''' Back-projects weight gradients from norm ball to their original space Arguments x : Old weight matrix y : Weight matrix projected to norm ball outderv : Gradients projected to norm ball alpha : Scale factor Returns Weight gradient matrix back-projected to the original weight space ''' normeps = 1e-5 epssumsq = np.sum(x**2, 1) + normeps l2rows = np.sqrt(epssumsq) * alpha if len(y) == 0: y = AuxFunctions.doUnbalancedMatrixOperation(x, l2rows, 'div') aux1 = AuxFunctions.doUnbalancedMatrixOperation( outderv, l2rows, 'div') aux2 = AuxFunctions.doUnbalancedMatrixOperation( y, (np.sum(outderv * x, 1) / epssumsq), 'mul') return aux1 - aux2
def doForwardPropagation(self, X, weights, biases): ''' Computes the forward propagation of the input in the SMNN: Z{l+1} = W{l}*H{l} + B{l} H{l+1} = f(Z{l+1}) where {l} and {l+1} denote layers, B is the bias matrix, columnwise repetition of the bias vector with the number of samples, Z is the output matrix of neurons before the activation function is applied, f(.) is the activation function H is the output matrix of neurons after the activation function is applied (h{1}=X), Arguments X : data matrix in the form [input dim., number of samples] weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns outputs : list of output matrices (z) of each layer (output of neuron before activation function) activities : list of activation matrices (h) of each layer (output of neuron after activation function) ''' assert self.isInitialized, 'ERROR:SMNN:doForwardPropagation: The instance is not properly initialized' # Default behaviour is bad implementation #if len(weights)==0 or len(biases)==0: # [weights, biases] = self.unrollParameters(self.params); assert AuxFunctions.checkNetworkParameters( weights, self.weightPrototypes ), 'ERROR:SMNN:doForwardPropagation: weight dimension does not match the network topology' assert AuxFunctions.checkNetworkParameters( biases, self.biasPrototypes ), 'ERROR:SMNN:doForwardPropagation: bias dimension does not match the network topology' outputs = [] activities = [] for layer in range(self.nLayers - 1): if layer == 0: x = X else: x = activities[layer - 1] z = np.dot(weights[layer], x) + np.repeat(biases[layer], x.shape[1], 1) if self.activation_fun == SMNN_ACTIVATION_FUNCTIONS[ SMNN_ACTFUN_SIGMOID]: h = AuxFunctions.sigmoid(z) else: # Should not be here print 'ERROR:SMNN:doForwardPropagation: Wrong activation function' sys.exit() outputs.append(z) activities.append(h) return [outputs, activities]
def doForwardPropagation(self, X, weights, biases): ''' Computes the forward propagation of the input in the network. Arguments X : data matrix in the form [input dim., number of samples] weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns activities : list of activation matrices (h) of each layer (output of neuron after activation function) ''' assert self.isInitialized, 'ERROR:StackedAutoencoder:doForwardPropagation: The instance is not properly initialized' activities = [] indata = X for i in range(len(self.hiddenLayers)): # Get original weights and biases of the sparse autoencoder layers W_sa = self.hiddenLayers[i].getWeights() b_sa = self.hiddenLayers[i].getBiases() # Replace the first layer's weights and biases with the current ones W_sa[0] = weights[i] b_sa[0] = biases[i] # Do the forward prop. with the new weights [outputs_sa, activities_sa ] = self.hiddenLayers[i].doForwardPropagation(indata, W_sa, b_sa) # Get the activity of the first layer activity = activities_sa[0] activities.append(activity) indata = activity outdata = self.softmaxmodel.doForwardPropagation(indata, weights[-1]) #outdata = np.dot(weights[-1], indata); # Convert output to probabilities: aux2 = AuxFunctions.doUnbalancedMatrixOperation(outdata, np.amax(outdata, 0), 'sub', axis=0) #Substracts the maximm value of the matrix "aux". aux3 = np.exp(aux2) y = AuxFunctions.doUnbalancedMatrixOperation(aux3, np.sum(aux3, 0), 'div', axis=0) #I divides the vector "aux3" by the sum of its elements. activities.append(y) return activities
def setParameters(self, W, b): ''' Sets the weights and biases of the layer with the given parameters Arguments W : weights to set b : biases to set ''' assert AuxFunctions.checkNetworkParameters([W], [self.weightPrototype]), 'ERROR:ConvLayer:setParameters: weight dimension does not match the network topology' ; assert AuxFunctions.checkNetworkParameters([b], [self.biasPrototype]), 'ERROR:ConvLayer:setParameters: bias dimension does not match the network topology'; self.weights = W; self.biases = b;
def rollParameters(self, weights, biases): ''' Converts the parameters in matrix form into vector weights : list of weight matrix of each layer biases : list of bias vector of each layer ''' assert AuxFunctions.checkNetworkParameters(weights, self.weightPrototypes), 'ERROR:CNN:rollParameters: weight dimension does not match the network topology' ; assert AuxFunctions.checkNetworkParameters(biases, self.biasPrototypes), 'ERROR:CNN:rollParameters: bias dimension does not match the network topology'; params = np.array([]); for i in range(len(weights)): params = np.hstack((params, weights[i].flatten(), biases[i].flatten())) return params
def doForwardPropagation(self, X, weights, biases): ''' Computes the forward propagation of the input in the CNN. Arguments X : data matrix in the form [input dim., number of samples] weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns activations : list of activation matrices (h) of each layer (output of neuron after activation function) ''' assert self.isInitialized, 'ERROR:CNN:doForwardPropagation: The instance is not properly initialized' assert AuxFunctions.checkNetworkParameters(weights, self.weightPrototypes), 'ERROR:CNN:doForwardPropagation: weight dimension does not match the network topology' ; assert AuxFunctions.checkNetworkParameters(biases, self.biasPrototypes), 'ERROR:CNN:doForwardPropagation: bias dimension does not match the network topology'; activations = []; # Input to the network indata = X; # Propagate through the convolutional layers for i in range(len(self.layers)): # Compute the activity of the current layer outdata = self.layers[i].doForwardPropagation(indata, weights[i], biases[i]); # Save the activity of the current layer activations.append(outdata); # Set the activity of the current layer as the input to the next layer indata = outdata[INDEX_ACTIVATION_POOL]; # Compute the activity of the softmax (output) layer # Reshape input for the softmax layer indata = np.reshape(indata, [indata.shape[0]*indata.shape[1]*indata.shape[2], indata.shape[3]]); # Compute the activity #outdata = self.softmaxmodel.predict(indata); z = np.dot(weights[-1], indata) + np.repeat(biases[-1], X.shape[2], 1); h = np.exp(z); y = AuxFunctions.doUnbalancedMatrixOperation(h, np.sum(h, 0), 'div', axis=0 ); # Save the activity activations.append(y); return activations;
def unrollParameters(self, params): ''' Converts the vectorized parameters into matrix params: parameters to unroll ''' weights = []; biases = []; read_start = 0; read_end = 0; # Convolutional layers for i in range(len(self.layers)): # set the end index for read read_end = read_start + self.layers[i].filterDim[INDEX_X]*self.layers[i].filterDim[INDEX_Y]*self.layers[i].numFilters; # read the weights for the current layer w = params[read_start:read_end]; # reshape and the weights weights.append( np.reshape(w, (self.layers[i].filterDim[INDEX_X], self.layers[i].filterDim[INDEX_Y], self.layers[i].numFilters)) ); # set the start index for the next read read_start = read_end; # set the end index for the next read read_end = read_start + self.layers[i].numFilters; # read the bias terms b = params[read_start:read_end]; # reshape and store the bias biases.append( np.reshape(b, (self.layers[i].numFilters, 1)) ) # set the start index for the next read read_start = read_end; # Softmax layer read_end = read_start+np.size(self.weights) w = params[read_start:read_end]; weights.append( np.reshape(w, self.weights.shape) ); # set the start index for the next read read_start = read_end; # set the end index for the next read read_end = read_start + len(self.biases); b = params[read_start:read_end]; biases.append(np.reshape(b, self.biases.shape)) assert AuxFunctions.checkNetworkParameters(weights, self.weightPrototypes), 'ERROR:CNN:unrollParameters: dimensions of given parameters do not match the network topology' ; assert AuxFunctions.checkNetworkParameters(biases, self.biasPrototypes), 'ERROR:CNN:unrollParameters: dimensions of given parameters do not match the network topology'; return weights, biases;
def setNetworkParameters(self, weights, biases): ''' Returns the parameters of the network in a stacked form Arguments weights : list weights to set for each layer biases : list of biases to set for each layer ''' assert AuxFunctions.checkNetworkParameters(weights, self.weightPrototypes), 'ERROR:CNN:setNetworkParameters: weight dimension does not match the network topology' ; assert AuxFunctions.checkNetworkParameters(biases, self.biasPrototypes), 'ERROR:CNN:setNetworkParameters: bias dimension does not match the network topology'; for i in range(len(self.layers)): W = weights[i]; b = biases[i]; self.layers[i].setParameters(W, b); # Size check is done in the layer self.weights = weights[-1]; self.biases = biases[-1];
def doForwardPropagation(self, X, weights, biases): ''' Computes the forward propagation of the input in the network. Arguments X : data matrix in the form [input dim., number of samples] weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns activities : list of activation matrices from convolution and pooling layers, respectively ''' assert self.isInitialized, 'ERROR:ConvLayer:doForwardPropagation: The instance is not properly initialized' assert AuxFunctions.checkNetworkParameters([weights], [self.weightPrototype]), 'ERROR:ConvLayer:doForwardPropagation: weight dimension does not match the network topology' ; assert AuxFunctions.checkNetworkParameters([biases], [self.biasPrototype]), 'ERROR:ConvLayer:doForwardPropagation: bias dimension does not match the network topology'; # Convolution activations_conv = convolve(self.filterDim, self.numFilters, X, weights, biases); # Pooling activations_pool = pool(self.poolDim, activations_conv, self.poolingFunction); return [activations_conv, activations_pool];
def computeCost(self, theta, X, y): ''' Computes the value of the Softmax regression objective function for given parameters (theta), data matrix (X) and corresponding labels (y): f = -( Y * log( P(Y|X;theta) ) ) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes Arguments theta : function parameters in the form (number of parameters * number of classes, ) X : data in the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns f : computed cost (floating point number) ''' assert self.isInitialized, 'ERROR:Softmax:computeCost: The instance is not properly initialized' assert X.shape[0]==self.nParams, 'ERROR:Softmax:computeCost: Dimensions of given data do not match with the number of parameters' epsilon = 1e-6; theta = self.unrollParameters(theta); f = 0; nSamples = X.shape[1]; aux1 = np.exp(np.dot(theta, X)); P = AuxFunctions.doUnbalancedMatrixOperation(aux1, np.sum(aux1, 0), 'div', axis=0); # Guard for log(0) if np.min(P)<epsilon: P = P + epsilon; aux3 = np.transpose(np.log(P)); #aux3 = np.transpose(np.log(P.clip(min=epsilon))); aux4 = np.repeat(np.reshape(range(self.nClasses), [1, self.nClasses]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.nClasses, 1); f = (-1.0/nSamples) * np.sum(aux3[aux4==aux5]); return f
def rollParameters(self, weights, biases): ''' Converts the parameters in matrix form into vector Arguments weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns params : parameter vector ''' assert AuxFunctions.checkNetworkParameters( weights, self.weightPrototypes ), 'ERROR:SparseAutoencoder:rollParameters: weight dimension does not match the network topology' assert AuxFunctions.checkNetworkParameters( biases, self.biasPrototypes ), 'ERROR:SparseAutoencoder:rollParameters: bias dimension does not match the network topology' params = np.array([]) for i in range(len(weights)): params = np.hstack( (params, weights[i].flatten(), biases[i].flatten())) return params
def setBiases(self, biases_new): ''' Updates the biases of the model parameters of the network Arguments biases_new : New biases to set ''' assert self.isInitialized, 'ERROR:SparseAutoencoder:setBiases: The instance is not properly initialized' assert AuxFunctions.checkNetworkParameters( biases_new, self.biasPrototypes ), 'ERROR:SparseAutoencoder:setBiases: bias dimension does not match the network topology' [weights, biases] = self.unrollParameters(self.params) biases = biases_new self.params = self.rollParameters(weights, biases)
def convolve(filterDim, numFilters, X, W, b): ''' Returns the convolution of the features given by W and b with the given data X Arguments filterDim : filter (feature) dimension numFilters : number of feature maps X : input data in the form images(r, c, image number) W : weights i.e. features, is of shape (filterDim,filterDim,numFilters) b : biases, is of shape (numFilters,1) Returns convolvedFeatures : matrix of convolved features in the form convolvedFeatures(imageRow, imageCol, featureNum, imageNum) ''' inputDimX = X.shape[INDEX_X]; inputDimY = X.shape[INDEX_Y]; numData = X.shape[2]; convDimX = inputDimX - filterDim[INDEX_X] + 1; convDimY = inputDimY - filterDim[INDEX_Y] + 1; convolvedFeatures = np.zeros([convDimX, convDimY, numFilters, numData]); for i in range(numData): for filterNum in range (numFilters): # Convolution of image with feature matrix convolvedImage = np.zeros([convDimX, convDimY]); # Obtain the feature (filterDim x filterDim) needed during the convolution filter = W[:,:,filterNum]; # Flip the feature matrix because of the definition of convolution, as explained later filter = np.rot90(filter, 2); # Obtain data data = X[:,:,i]; #Convolve "filter" with "data", adding the result to convolvedImage convolvedImage = scipy.signal.convolve2d(data, filter, mode='valid'); # Add the bias unit # Then, apply the sigmoid function to get the hidden activation convolvedImage = AuxFunctions.sigmoid(convolvedImage + b[filterNum]); convolvedFeatures[:,:,filterNum,i] = convolvedImage; return convolvedFeatures
def rollParameters(self, theta): ''' Converts a given parameter matrix into a vector Arguments theta : parameter matrix Returns theta : parameter vector ''' assert self.isInitialized, 'ERROR:SoftICA:unrollParameters: The instance is not properly initialized' assert AuxFunctions.checkNetworkParameters( [theta], [self.weightPrototype] ), 'ERROR:SoftICA:rollParameters: Weight dimension does not match the network topology' return theta.flatten()
def testGradient(self, X, y): ''' Tests the analytical gradient computation by comparing it with the numerical gradients Arguments X : data matrix the form [input dim., number of samples] y : labels in the form [1, number of samples]. Returns result : 0 if passed, -1 if failed ''' assert self.isInitialized, 'ERROR:DeepNetwork:testGradient: The instance is not properly initialized' if self.debug: print 'DEBUG:DeepNetwork:testGradient: Testing gradient computation...' result = 0 theta_list = self.getNetworkParameters() theta = self.unstackParameters(theta_list) grad = self.computeGradient(theta, X, y) numGrad = AuxFunctions.computeNumericalGradient(func=self.computeCost, params=theta, args=((X, y))) errorGrad = np.sqrt(np.sum((grad - numGrad)**2)) if errorGrad < 1e-4: if self.debug: print 'DEBUG:DeepNetwork:testGradient:Gradient error: ', errorGrad print 'DEBUG:DeepNetwork:testGradient:Gradient check PASSED!' print result = 0 else: if self.debug: print 'DEBUG:DeepNetwork:testGradient:Gradient error: ', errorGrad print 'DEBUG:DeepNetwork:testGradient:Gradient check FAILED!' print result = -1 return result
def l2rowscaled(self, x, alpha): ''' Project weights to norm ball to prevent degenerate bases Arguments x : Weight matrix alpha : Scale factor Returns y : Weight matrix projected to norm ball ''' normeps = 1e-5 epssumsq = np.sum(x**2, 1) + normeps l2rows = np.sqrt(epssumsq) * alpha y = AuxFunctions.doUnbalancedMatrixOperation(x, l2rows, 'div') return y
def testGradient(self, X, y): ''' Tests the analytical gradient computation by comparing it with the numerical gradients Arguments X : data matrix the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns result : 0 if passed, -1 if failed ''' assert self.isInitialized, 'ERROR:Linreg:testGradient: The instance is not properly initialized' assert X.shape[ 0] == self.nParams, 'ERROR:Linreg:testGradient: Dimensions of given data do not match with the number of parameters' if self.debug: print 'DEBUG:Linreg:testGradient: Testing gradient computation... ' result = 0 grad = self.computeGradient(self.theta, X, y) numGrad = AuxFunctions.computeNumericalGradient(func=self.computeCost, params=self.theta, args=(X, y)) errorGrad = np.sqrt(np.sum((grad - numGrad)**2)) if errorGrad < 1e-4: if self.debug: print 'DEBUG:Linreg:testGradient: Gradient error: ', errorGrad print 'DEBUG:Linreg:testGradient: Gradient check PASSED!' print result = 0 else: if self.debug: print 'DEBUG:Linreg:testGradient: Gradient error: ', errorGrad print 'DEBUG:Linreg:testGradient: Gradient check FAILED!' print result = -1 return result
def testGradient(self, X, y): ''' Tests the analytical gradient computation by comparing it with the numerical gradients Arguments X : data matrix the form [input dim., number of samples] y : labels in the form [1, number of samples] Returns result : 0 if passed, -1 if failed ''' assert self.isInitialized, 'ERROR:CNN:testGradient: The instance is not properly initialized' if self.debug: print 'DEBUG:CNN:testGradient: Testing gradient computation...' result = 0; [weights, biases] = self.getNetworkParameters(); params = self.rollParameters(weights, biases); grad = self.computeGradient(params, X, y); numGrad = AuxFunctions.computeNumericalGradient( func=self.computeCost, params=params, args=((X, y)) ); errorGrad = np.sqrt(np.sum((grad - numGrad)**2)); if errorGrad<1e-4: if self.debug: print 'DEBUG:CNN:testGradient:Gradient error: ', errorGrad print 'DEBUG:CNN:testGradient:Gradient check PASSED!' print result = 0; else: if self.debug: print 'DEBUG:CNN:testGradient:Gradient error: ', errorGrad print 'DEBUG:CNN:testGradient:Gradient check FAILED!' print result = -1; return result
def unstackParameters(self, theta_list): ''' Converts the model parameters from stacked form into vector form Arguments theta_list : list of model parameters of each layer Returns theta : vector of combined model parameters of the network ''' assert self.isInitialized, 'ERROR:DeepNetwork:unstackParameters: The instance is not properly initialized' assert AuxFunctions.checkNetworkParameters( theta_list, self.modelParameterPrototype ), 'ERROR:DeepNetwork:unstackParameters: model parameter dimension does not match the network topology' theta = np.array([]) for i in range(len(theta_list)): theta = np.hstack((theta, theta_list[i].flatten())) return theta
def computeCost(self, theta, X, y): ''' Computes the value of the CNN objective function for given parameters (theta), data matrix (X) and corresponding labels (y): f = -( Y * log( P(Y|X;theta) ) ) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes Arguments theta : function parameters in the form (number of parameters * number of classes, ) X : data in the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns f : computed cost (floating point number) ''' assert self.isInitialized, 'ERROR:CNN:computeCost: The instance is not properly initialized' f = 0; nSamples = X.shape[2]; [weights, biases] = self.unrollParameters(theta); activations = self.doForwardPropagation(X, weights, biases); P = AuxFunctions.doUnbalancedMatrixOperation(activations[-1], np.sum(activations[-1], 0), 'div', axis=0); aux3 = np.transpose(np.log(P)); aux4 = np.repeat(np.reshape(range(self.outputDim), [1, self.outputDim]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.outputDim, 1); aux6 = aux4==aux5; f = (-1.0/nSamples) * np.sum(aux3 * aux6.astype(int)); return f
def computeGradient(self, theta, X, y): ''' Computes gradients of the Softmax regression objective function wrt parameters (theta) for a given data matrix (X) and corresponding labels (y): g = -( X * (Y - P(y|X;theta)) ) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes Arguments theta : function parameters in the form [number of parameters, 1] X : data in the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns g : computed gradients of parameters array in the form (number of parameters*number of classes,) ''' assert self.isInitialized, 'ERROR:Softmax:computeGradient: The instance is not properly initialized' assert X.shape[0]==self.nParams, 'ERROR:Softmax:computeGradient: Dimensions of given data do not match with the number of parameters' theta = self.unrollParameters(theta); g = np.zeros(np.shape(theta)); nSamples = X.shape[1]; aux1 = np.exp(np.dot(theta, X)); P = AuxFunctions.doUnbalancedMatrixOperation(aux1, np.sum(aux1, 0), 'div', axis=0); aux4 = np.repeat(np.reshape(range(self.nClasses), [1, self.nClasses]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.nClasses, 1); aux6 = aux4==aux5; g = (-1.0/nSamples) * np.transpose(np.dot(X, np.transpose(np.transpose(aux6.astype(int)) - P))); return g.flatten()
def __init__(self, sizeLayers, lambd=0.99, epsilon=1e-2, debug=0): ''' Initialization function of the SoftICA class Arguments sizeLayers : Size of the layers, must be in the form [Input dimensions, hidden layer dimensions, output layer dimensions] where output layer dimensions = input layer dimensions lambd : Sparsity cost, default is 0.99 epsilon : L1-regularisation epsilon |Wx| ~ sqrt((Wx).^2 + epsilon), default is 1e-2 debug : Debugging flag ''' self.isInitialized = False self.debug = debug self.inputDim = sizeLayers[0] self.featureDim = sizeLayers[1] self.lambd = lambd self.epsilon = epsilon assert self.inputDim > 0, 'ERROR:SoftICA:init: Input size must be >0' assert self.featureDim > 0, 'ERROR:SoftICA:init: Feature size must be >0' weights = np.random.rand(self.featureDim, self.inputDim) * 0.01 weights = AuxFunctions.doUnbalancedMatrixOperation( weights, np.sqrt(np.sum(weights**2, 1)), 'div') self.params = weights.flatten() self.weightPrototype = (self.featureDim, self.inputDim) if debug: print 'DEBUG:SoftICA:init: initialized for inputDim: ', self.inputDim print 'DEBUG:SoftICA:init: initialized for featureDim: ', self.featureDim print 'DEBUG:SoftICA:init: initialized for lambda: ', self.lambd print 'DEBUG:SoftICA:init: initialized for epsilon: ', self.epsilon print self.isInitialized = True
def stackParameters(self, theta): ''' Converts the model parameters from vector form into stacked form Arguments theta : vector of combined model parameters of the network Returns theta_list : list of model parameters of each layer ''' assert self.isInitialized, 'ERROR:DeepNetwork:stackParameters: The instance is not properly initialized' theta_list = [] i_start = 0 for i in range(len(self.modelParameterPrototype)): i_stop = i_start + self.modelParameterPrototype[i] theta_list.append(theta[i_start:i_stop]) i_start = i_stop assert AuxFunctions.checkNetworkParameters( theta_list, self.modelParameterPrototype ), 'ERROR:DeepNetwork:stackParameters: model parameter dimension does not match the network topology' return theta_list
def computeGradient(self, theta, X, y): ''' Computes gradients of the CNN objective function for given parameters, data and corresponding labels using the back propagation. First, the error of the output (Softmax) layer is computed: E_out = (Y - P(y|X;theta)) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes The output error is then back propagated to the convolutional layer: error_conv = W_out' * E_out And this error is further propagated within the convolutional layers. Gradients are computed: dJ(W,b;X,y)/dW_{l} = E_{l+1} * H_{l}' dJ(W,b;X,y)/db_{l} = sum(E_{l+1}) where sum(.) is taken columnwise i.e. over samples Arguments theta : function parameters in the form (feature dim * input dim, ) X : data matrix in the form [input dim, number of samples] y : labels in the form [1, number of samples] Returns grad : gradients of weights and biases in rolled form ''' assert self.isInitialized, 'ERROR:CNN:computeGradient: The instance is not properly initialized' gradients_W = []; gradients_b = []; nSamples = X.shape[2]; [weights, biases] = self.unrollParameters(theta); activations = self.doForwardPropagation(X, weights, biases); # Error of the output layer P = AuxFunctions.doUnbalancedMatrixOperation(activations[-1], np.sum(activations[-1], 0), 'div', axis=0); aux4 = np.repeat(np.reshape(range(self.outputDim), [1, self.outputDim]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.outputDim, 1); aux6 = aux4==aux5; error_out = (-1.0/nSamples) * (np.transpose(aux6.astype(int)) - P); # Gradient of the output layer act = activations[-2][INDEX_ACTIVATION_POOL] act = np.reshape(act, [act.shape[0]*act.shape[1]*act.shape[2], act.shape[3]]) W_grad = np.dot(error_out, np.transpose(act)); b_grad = np.dot(error_out, np.ones([nSamples, 1])); gradients_W.append(W_grad); gradients_b.append(b_grad); # Propagation of error_out to the last pooling layer error_pool = np.reshape( (np.dot(np.transpose(weights[-1]), error_out)), [self.layers[-1].outputDim[INDEX_X], self.layers[-1].outputDim[INDEX_Y], self.layers[-1].numFilters, nSamples]); # Back propagation of error through the layers error = error_pool for i in range(len(self.layers)): # Layer input if i==(len(self.layers)-1): layer_in = X; else: layer_in = activations[len(self.layers)-1-i-1][INDEX_ACTIVATION_POOL]; # Layer output layer_out = activations[len(self.layers)-1-i] # Backpropagate error #[error_bp, W_grad, b_grad] = self.layers[len(self.layers)-1-i].backPropagateError(error, layer_in, layer_out); [error_bp, W_grad, b_grad] = self.layers[len(self.layers)-1-i].backPropagateError(error, layer_in, layer_out, weights[i]); # Save gradients gradients_W.append(W_grad); gradients_b.append(b_grad); # Set error for the next (previous) layer error = error_bp; # Reverse gradients gradients_W = list(reversed(gradients_W)) gradients_b = list(reversed(gradients_b)) return self.rollParameters(gradients_W, gradients_b);
def testGradient(self, X): ''' Tests the analytical gradient computation by comparing it with the numerical gradients Arguments X : data matrix the form [input dim., number of samples] Returns result : 0 if passed, -1 if failed ''' assert self.isInitialized, 'ERROR:SparseCoding:testGradient: The instance is not properly initialized' if self.debug: print 'DEBUG:SparseCoding:testGradient:Checking weight gradient...' result = 0 grad = self.computeWeightGradient(self.weights_vec, self.features_vec, X) numGrad = AuxFunctions.computeNumericalGradient( func=self.computeWeightCost, params=self.weights_vec, args=(self.features_vec, X)) errorGrad = np.sqrt(np.sum((grad - numGrad)**2)) if errorGrad < 1e-4: if self.debug: print 'DEBUG:SparseCoding:testGradient:Gradient error: ', errorGrad print 'DEBUG:SparseCoding:testGradient:Gradient check PASSED!' print else: if self.debug: print 'DEBUG:SparseCoding:testGradient:Gradient error: ', errorGrad print 'DEBUG:SparseCoding:testGradient:Gradient check FAILED!' print result = -1 if self.debug: print 'DEBUG:SparseCoding:testGradient:Checking feature gradient...' grad = self.computeFeatureGradient(self.features_vec, self.weights_vec, X) numGrad = AuxFunctions.computeNumericalGradient( func=self.computeFeatureCost, params=self.features_vec, args=(self.weights_vec, X)) errorGrad = np.sqrt(np.sum((grad - numGrad)**2)) if errorGrad < 1e-4: if self.debug: print 'DEBUG:SparseCoding:testGradient:Gradient error: ', errorGrad print 'DEBUG:SparseCoding:testGradient:Gradient check PASSED!' print else: if self.debug: print 'DEBUG:SparseCoding:testGradient:Gradient error: ', errorGrad print 'DEBUG:SparseCoding:testGradient:Gradient check FAILED!' print result = -1 return result