def l2rowscaledg(self, x, y, outderv, alpha): ''' Back-projects weight gradients from norm ball to their original space Arguments x : Old weight matrix y : Weight matrix projected to norm ball outderv : Gradients projected to norm ball alpha : Scale factor Returns Weight gradient matrix back-projected to the original weight space ''' normeps = 1e-5 epssumsq = np.sum(x**2, 1) + normeps l2rows = np.sqrt(epssumsq) * alpha if len(y) == 0: y = AuxFunctions.doUnbalancedMatrixOperation(x, l2rows, 'div') aux1 = AuxFunctions.doUnbalancedMatrixOperation( outderv, l2rows, 'div') aux2 = AuxFunctions.doUnbalancedMatrixOperation( y, (np.sum(outderv * x, 1) / epssumsq), 'mul') return aux1 - aux2
def doForwardPropagation_asLayer(self, X, theta): ''' Wrapper function for doForwardPropagation for cases where Softmax model is used as a layer of a deep network. Arguments X : data matrix in the form [input dim., number of samples] theta : model parameters for the first layer, must be packed as [weights+biases] Returns activation : activation if the first layer ''' assert self.isInitialized, 'ERROR:Softmax:doForwardPropagationAsLayer: The instance is not properly initialized' assert X.shape[0]==self.nParams, 'ERROR:Softmax:doForwardPropagation: Dimensions of given data do not match with the number of parameters' assert np.size(theta)==self.thetaMatrixPrototype[0]*self.thetaMatrixPrototype[1], 'ERROR:Softmax:doForwardPropagation: Dimensions of given weights do not match the internal structure' weights = self.unrollParameters(theta); activation = self.doForwardPropagation(X, weights); # Convert output to probabilities: aux2 = AuxFunctions.doUnbalancedMatrixOperation(activation, np.amax(activation, 0), 'sub', axis=0); #Substracts the maximm value of the matrix "aux". aux3 = np.exp(aux2); y = AuxFunctions.doUnbalancedMatrixOperation(aux3, np.sum(aux3, 0), 'div', axis=0); #I divides the vector "aux3" by the sum of its elements. return y;
def doForwardPropagation(self, X, weights, biases): ''' Computes the forward propagation of the input in the network. Arguments X : data matrix in the form [input dim., number of samples] weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns activities : list of activation matrices (h) of each layer (output of neuron after activation function) ''' assert self.isInitialized, 'ERROR:StackedAutoencoder:doForwardPropagation: The instance is not properly initialized' activities = [] indata = X for i in range(len(self.hiddenLayers)): # Get original weights and biases of the sparse autoencoder layers W_sa = self.hiddenLayers[i].getWeights() b_sa = self.hiddenLayers[i].getBiases() # Replace the first layer's weights and biases with the current ones W_sa[0] = weights[i] b_sa[0] = biases[i] # Do the forward prop. with the new weights [outputs_sa, activities_sa ] = self.hiddenLayers[i].doForwardPropagation(indata, W_sa, b_sa) # Get the activity of the first layer activity = activities_sa[0] activities.append(activity) indata = activity outdata = self.softmaxmodel.doForwardPropagation(indata, weights[-1]) #outdata = np.dot(weights[-1], indata); # Convert output to probabilities: aux2 = AuxFunctions.doUnbalancedMatrixOperation(outdata, np.amax(outdata, 0), 'sub', axis=0) #Substracts the maximm value of the matrix "aux". aux3 = np.exp(aux2) y = AuxFunctions.doUnbalancedMatrixOperation(aux3, np.sum(aux3, 0), 'div', axis=0) #I divides the vector "aux3" by the sum of its elements. activities.append(y) return activities
def computeCost(self, theta, X, y): ''' Computes the value of the Softmax regression objective function for given parameters (theta), data matrix (X) and corresponding labels (y): f = -( Y * log( P(Y|X;theta) ) ) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes Arguments theta : function parameters in the form (number of parameters * number of classes, ) X : data in the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns f : computed cost (floating point number) ''' assert self.isInitialized, 'ERROR:Softmax:computeCost: The instance is not properly initialized' assert X.shape[0]==self.nParams, 'ERROR:Softmax:computeCost: Dimensions of given data do not match with the number of parameters' epsilon = 1e-6; theta = self.unrollParameters(theta); f = 0; nSamples = X.shape[1]; aux1 = np.exp(np.dot(theta, X)); P = AuxFunctions.doUnbalancedMatrixOperation(aux1, np.sum(aux1, 0), 'div', axis=0); # Guard for log(0) if np.min(P)<epsilon: P = P + epsilon; aux3 = np.transpose(np.log(P)); #aux3 = np.transpose(np.log(P.clip(min=epsilon))); aux4 = np.repeat(np.reshape(range(self.nClasses), [1, self.nClasses]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.nClasses, 1); f = (-1.0/nSamples) * np.sum(aux3[aux4==aux5]); return f
def doForwardPropagation(self, X, weights, biases): ''' Computes the forward propagation of the input in the CNN. Arguments X : data matrix in the form [input dim., number of samples] weights : list of weight matrices of each layer biases : list of bias vectors of each layer Returns activations : list of activation matrices (h) of each layer (output of neuron after activation function) ''' assert self.isInitialized, 'ERROR:CNN:doForwardPropagation: The instance is not properly initialized' assert AuxFunctions.checkNetworkParameters(weights, self.weightPrototypes), 'ERROR:CNN:doForwardPropagation: weight dimension does not match the network topology' ; assert AuxFunctions.checkNetworkParameters(biases, self.biasPrototypes), 'ERROR:CNN:doForwardPropagation: bias dimension does not match the network topology'; activations = []; # Input to the network indata = X; # Propagate through the convolutional layers for i in range(len(self.layers)): # Compute the activity of the current layer outdata = self.layers[i].doForwardPropagation(indata, weights[i], biases[i]); # Save the activity of the current layer activations.append(outdata); # Set the activity of the current layer as the input to the next layer indata = outdata[INDEX_ACTIVATION_POOL]; # Compute the activity of the softmax (output) layer # Reshape input for the softmax layer indata = np.reshape(indata, [indata.shape[0]*indata.shape[1]*indata.shape[2], indata.shape[3]]); # Compute the activity #outdata = self.softmaxmodel.predict(indata); z = np.dot(weights[-1], indata) + np.repeat(biases[-1], X.shape[2], 1); h = np.exp(z); y = AuxFunctions.doUnbalancedMatrixOperation(h, np.sum(h, 0), 'div', axis=0 ); # Save the activity activations.append(y); return activations;
def l2rowscaled(self, x, alpha): ''' Project weights to norm ball to prevent degenerate bases Arguments x : Weight matrix alpha : Scale factor Returns y : Weight matrix projected to norm ball ''' normeps = 1e-5 epssumsq = np.sum(x**2, 1) + normeps l2rows = np.sqrt(epssumsq) * alpha y = AuxFunctions.doUnbalancedMatrixOperation(x, l2rows, 'div') return y
def computeCost(self, theta, X, y): ''' Computes the value of the CNN objective function for given parameters (theta), data matrix (X) and corresponding labels (y): f = -( Y * log( P(Y|X;theta) ) ) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes Arguments theta : function parameters in the form (number of parameters * number of classes, ) X : data in the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns f : computed cost (floating point number) ''' assert self.isInitialized, 'ERROR:CNN:computeCost: The instance is not properly initialized' f = 0; nSamples = X.shape[2]; [weights, biases] = self.unrollParameters(theta); activations = self.doForwardPropagation(X, weights, biases); P = AuxFunctions.doUnbalancedMatrixOperation(activations[-1], np.sum(activations[-1], 0), 'div', axis=0); aux3 = np.transpose(np.log(P)); aux4 = np.repeat(np.reshape(range(self.outputDim), [1, self.outputDim]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.outputDim, 1); aux6 = aux4==aux5; f = (-1.0/nSamples) * np.sum(aux3 * aux6.astype(int)); return f
def computeGradient(self, theta, X, y): ''' Computes gradients of the Softmax regression objective function wrt parameters (theta) for a given data matrix (X) and corresponding labels (y): g = -( X * (Y - P(y|X;theta)) ) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes Arguments theta : function parameters in the form [number of parameters, 1] X : data in the form [number of parameters, number of samples] y : labels in the form [1, number of samples] Returns g : computed gradients of parameters array in the form (number of parameters*number of classes,) ''' assert self.isInitialized, 'ERROR:Softmax:computeGradient: The instance is not properly initialized' assert X.shape[0]==self.nParams, 'ERROR:Softmax:computeGradient: Dimensions of given data do not match with the number of parameters' theta = self.unrollParameters(theta); g = np.zeros(np.shape(theta)); nSamples = X.shape[1]; aux1 = np.exp(np.dot(theta, X)); P = AuxFunctions.doUnbalancedMatrixOperation(aux1, np.sum(aux1, 0), 'div', axis=0); aux4 = np.repeat(np.reshape(range(self.nClasses), [1, self.nClasses]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.nClasses, 1); aux6 = aux4==aux5; g = (-1.0/nSamples) * np.transpose(np.dot(X, np.transpose(np.transpose(aux6.astype(int)) - P))); return g.flatten()
def __init__(self, sizeLayers, lambd=0.99, epsilon=1e-2, debug=0): ''' Initialization function of the SoftICA class Arguments sizeLayers : Size of the layers, must be in the form [Input dimensions, hidden layer dimensions, output layer dimensions] where output layer dimensions = input layer dimensions lambd : Sparsity cost, default is 0.99 epsilon : L1-regularisation epsilon |Wx| ~ sqrt((Wx).^2 + epsilon), default is 1e-2 debug : Debugging flag ''' self.isInitialized = False self.debug = debug self.inputDim = sizeLayers[0] self.featureDim = sizeLayers[1] self.lambd = lambd self.epsilon = epsilon assert self.inputDim > 0, 'ERROR:SoftICA:init: Input size must be >0' assert self.featureDim > 0, 'ERROR:SoftICA:init: Feature size must be >0' weights = np.random.rand(self.featureDim, self.inputDim) * 0.01 weights = AuxFunctions.doUnbalancedMatrixOperation( weights, np.sqrt(np.sum(weights**2, 1)), 'div') self.params = weights.flatten() self.weightPrototype = (self.featureDim, self.inputDim) if debug: print 'DEBUG:SoftICA:init: initialized for inputDim: ', self.inputDim print 'DEBUG:SoftICA:init: initialized for featureDim: ', self.featureDim print 'DEBUG:SoftICA:init: initialized for lambda: ', self.lambd print 'DEBUG:SoftICA:init: initialized for epsilon: ', self.epsilon print self.isInitialized = True
def computeGradient(self, theta, X, y): ''' Computes gradients of the CNN objective function for given parameters, data and corresponding labels using the back propagation. First, the error of the output (Softmax) layer is computed: E_out = (Y - P(y|X;theta)) where Y is ground truth matrix, a binary matrix where for each column (i.e. sample) the row corresponding to the true class is one and the rest is zero P(Y|X;theta) = exp(theta'*X)/sum_j(exp(theta_j'*X)), j = 1 to number of classes The output error is then back propagated to the convolutional layer: error_conv = W_out' * E_out And this error is further propagated within the convolutional layers. Gradients are computed: dJ(W,b;X,y)/dW_{l} = E_{l+1} * H_{l}' dJ(W,b;X,y)/db_{l} = sum(E_{l+1}) where sum(.) is taken columnwise i.e. over samples Arguments theta : function parameters in the form (feature dim * input dim, ) X : data matrix in the form [input dim, number of samples] y : labels in the form [1, number of samples] Returns grad : gradients of weights and biases in rolled form ''' assert self.isInitialized, 'ERROR:CNN:computeGradient: The instance is not properly initialized' gradients_W = []; gradients_b = []; nSamples = X.shape[2]; [weights, biases] = self.unrollParameters(theta); activations = self.doForwardPropagation(X, weights, biases); # Error of the output layer P = AuxFunctions.doUnbalancedMatrixOperation(activations[-1], np.sum(activations[-1], 0), 'div', axis=0); aux4 = np.repeat(np.reshape(range(self.outputDim), [1, self.outputDim]), nSamples, 0) aux5 = np.repeat(np.reshape(y, [nSamples, 1]), self.outputDim, 1); aux6 = aux4==aux5; error_out = (-1.0/nSamples) * (np.transpose(aux6.astype(int)) - P); # Gradient of the output layer act = activations[-2][INDEX_ACTIVATION_POOL] act = np.reshape(act, [act.shape[0]*act.shape[1]*act.shape[2], act.shape[3]]) W_grad = np.dot(error_out, np.transpose(act)); b_grad = np.dot(error_out, np.ones([nSamples, 1])); gradients_W.append(W_grad); gradients_b.append(b_grad); # Propagation of error_out to the last pooling layer error_pool = np.reshape( (np.dot(np.transpose(weights[-1]), error_out)), [self.layers[-1].outputDim[INDEX_X], self.layers[-1].outputDim[INDEX_Y], self.layers[-1].numFilters, nSamples]); # Back propagation of error through the layers error = error_pool for i in range(len(self.layers)): # Layer input if i==(len(self.layers)-1): layer_in = X; else: layer_in = activations[len(self.layers)-1-i-1][INDEX_ACTIVATION_POOL]; # Layer output layer_out = activations[len(self.layers)-1-i] # Backpropagate error #[error_bp, W_grad, b_grad] = self.layers[len(self.layers)-1-i].backPropagateError(error, layer_in, layer_out); [error_bp, W_grad, b_grad] = self.layers[len(self.layers)-1-i].backPropagateError(error, layer_in, layer_out, weights[i]); # Save gradients gradients_W.append(W_grad); gradients_b.append(b_grad); # Set error for the next (previous) layer error = error_bp; # Reverse gradients gradients_W = list(reversed(gradients_W)) gradients_b = list(reversed(gradients_b)) return self.rollParameters(gradients_W, gradients_b);