def forward(self, inputVal): assert (n_inputs == len(inputVal) ), 'Input Size Mismatch' # bias unit inputVal = np.array(inputVal) inputVal = np.append(inputVal,1) assert (self.nhid > 0), "Atleast One Hidden Layer" # computation for first hidden layer self.netUnits[0][:,0] = np.dot(self.inW , inputVal) self.netUnits[0][:,1] = activations.activation(self.netUnits[0][:,0], self.actfn) #computation for rest of the hidden layers if self.nhid > 1: for i in range(1,self.nhid): tempInp = np.copy(self.netUnits[i-1][:,1]) tempInp = np.append(tempInp,1) self.netUnits[i][:,0]= np.dot(self.hidW[i-1],tempInp) self.netUnits[i][:,1]= activations.activation(self.netUnits[i][:,0], self.actfn) #computation for the output layer tempInp= np.copy(self.netUnits[self.nhid-1][:,1]) tempInp = np.append(tempInp,1) self.outNet = np.dot(self.outW,tempInp) self.outProb = np.exp(self.outNet)/sum(np.exp(self.outNet))
def error_minimization(W, b, zeta, a, prev_layer, activation_func, den_activation, y, w=None, d=None, y_pred=None): dW = {} dB = {} delta = {} try: batch_size = y.shape[1] except IndexError: batch_size = 1 y = cp.reshape(y, (y.shape[0], batch_size)) is_last_layer = (type(w) == type(d)) and (type(d) == type(None)) if is_last_layer: delta['s'] = cp.subtract(a['s'], y) dB['s'] = (1 / batch_size) * cp.sum(delta['s'], axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) delta['s'] = cp.reshape(delta['s'], (delta['s'].shape[0], 1, delta['s'].shape[1])) dW['s'] = (1 / batch_size) * cp.einsum('nik,kjn->nij', delta['s'], a['d'].T) else: w = cp.array(w) deltaW = cp.einsum('nik,kij->nj', w.T, d) deltaW = cp.reshape(deltaW, (deltaW.shape[0], 1, deltaW.shape[1])) a_der = activation(str(activation_func) + '_der', zeta['s']) delta['s'] = cp.multiply(deltaW, a_der) dB['s'] = (1 / batch_size) * cp.sum(delta['s'].squeeze(), axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) dW['s'] = (1 / batch_size) * cp.einsum('nik,kjn->nij', delta['s'], a['d'].T) deltaW = cp.einsum('nik,kij->knj', W['s'].T, delta['s']) a_der = activation(den_activation + '_der', zeta['d']) delta['d'] = cp.multiply(deltaW, a_der) dB['d'] = (1 / batch_size) * cp.sum(delta['d'], axis=2) dB['d'] = cp.reshape(dB['d'], (dB['d'].shape[0], dB['d'].shape[1], 1)) dW['d'] = (1 / batch_size) * cp.dot(delta['d'], prev_layer.T) return [dW, dB, delta]
def convpool(X, convFilters, bias, kernel, stride): assert (kernel == 2), 'Only Size 2 Kernel Supported Currently' assert (stride == 2), 'Only Size 2 Stride Supported Currently' featureMaps = [] for i in range(len(convFilters)): featureMap = [] convFilter = convFilters[i] depth = len(convFilter) assert (depth == len(X)), 'Dimension Mismatch' for j in range(depth): featureMap.append( signal.convolve2d(X[j], np.rot90(convFilter[j], 2), 'valid')) featureMap = act.activation( sum(featureMap) + bias[i] * np.ones( (featureMap[0].shape[0], featureMap[0].shape[1])), activation) pre = featureMap.reshape(featureMap.shape[0] / 2, 2, featureMap.shape[1] / 2, 2) if pool == 'max': featureMaps.append(pre.max(axis=(1, 3))) elif pool == 'mean': featureMaps.append(pre.mean(axis=(1, 3))) else: assert (1 == 2), 'Invalid Pool option' return np.asarray(featureMaps)
def predict(self, x, is_training=False): ''' Calculates the output of the model for the input x If return_activations is set to true, then returns a python list of activations of all layers ''' # Forward propagation : non_lin( matrix multiplication + biases) layer_activations = [x] for layer in range(0, len(self.layer_sizes) - 1): # use activations from the previous layer, and nonlinearities of the current layer curr_layer_activation = activation( np.dot(layer_activations[layer], self.weight_matrices[layer]) + self.biases[layer], self.non_lins[layer + 1]) if self.layer_type[ layer + 1] == 'dropout' and is_training == True: # do dropout only during training mask = np.random.binomial( [np.ones((1, curr_layer_activation.shape[1]))], self.dropout_keep_prob[layer + 1])[0] mask = np.asfarray(mask) mask *= 1.0 / (self.dropout_keep_prob[layer + 1]) # print('mask = {}'.format(mask)) curr_layer_activation = curr_layer_activation * mask layer_activations.append(curr_layer_activation) if is_training: return layer_activations else: return layer_activations[-1]
def forward(self, inputData): weights = self.Weights biases = self.Biases poolParams = self.poolParams # layer0 = input Layer layer0 = np.asarray(inputData) # layer1 = conv1 layer layer1 = convFwd(np.asarray([layer0]),weights[0],biases[0]) # layer2 = pool1 layer layer2 = poolFwd(layer1, poolParams[0][0], poolParams[0][1]) # layer2 = convpool(np.asarray([layer0]),weights[0],biases[0], poolParams[0][0], poolParams[0][1]) # layer3 = conv2 layer layer3 = convFwd(layer2,weights[1],biases[1]) # layer4 = pool2 layer layer4 = poolFwd(layer3, poolParams[1][0], poolParams[1][1]) # layer4 = convpool(layer2,weights[1],biases[1], poolParams[1][0], poolParams[1][1]) # layer5 = fc1 layer layer5 = convFwd( layer4,weights[2] ,biases[2] ) # layer6 = fc2 layer layer6 = act.activation(np.dot(weights[3],layer5[:,0]).transpose() + biases[3] , activation ).transpose() # layer7 = softmax layer layer7 = np.dot( weights[4], layer6[:,0] ).transpose() + biases[4] layer7 -= np.max(layer7) layer7 = np.exp(layer7)/sum(np.exp(layer7)) return layer7
def forward(self, X): try: batch_size = X.shape[1] except IndexError: batch_size = 1 X = cp.reshape(X, (X.shape[0], batch_size)) self.prev_layer = X self.zeta['d'] = cp.dot(self.W['d'], self.prev_layer) + self.b['d'] self.a['d'] = activation(func=self.den_activation, x=self.zeta['d']) self.zeta['s'] = cp.einsum('nik,nkj->nij', self.W['s'], self.a['d']) + self.b['s'] self.a['s'] = activation(func=self.activation, x=self.zeta['s']).squeeze() return self.a['s']
def forward_propagation(self,X): try: batch_size = X.shape[1] except IndexError: batch_size=1 X = cp.reshape(X,(X.shape[0],batch_size)) for i in range(len(self.neurons_per_layer)): if i==0: self.layers[i] = X self.zeta[i] = X elif i==len(self.neurons_per_layer)-1: self.zeta[i] = cp.dot(self.weights[i-1],self.layers[i-1])+self.biases[i-1] self.layers[i] = activation(func='softmax',x=self.zeta[i]) else: self.zeta[i] = cp.dot(self.weights[i-1],self.layers[i-1])+self.biases[i-1] self.layers[i] = activation(func=self.activation_functions[i-1],x=self.zeta[i])
def convFwd(X, convFilters, bias): featureMaps = [] for i in range(len(convFilters)): featureMap = [] convFilter = convFilters[i] depth = len(convFilter) assert (depth == len(X)), 'Dimension Mismatch' for j in range(depth): featureMap.append( signal.convolve2d(X[j], np.rot90(convFilter[j], 2), 'valid')) featureMap = sum(featureMap) + bias[i] * np.ones( (featureMap[0].shape[0], featureMap[0].shape[1])) featureMaps.append(act.activation(featureMap, activation)) return np.asarray(featureMaps)
def backward_pass(self, layer_activations, targets): ''' Return the deltas for each layer of the network; deltas are as defined in theory of Michael Nielson's book ''' # Backward propagation : calculate the errors and gradients deltas = [None] * (len(self.layer_sizes)) # we assume that loss is always cross entropy and last layer is a softmax layer deltas[-1] = losses.cross_entropy_loss(layer_activations[-1], targets, deriv=True) # start the iteration from the second last layer for layer in range(len(deltas) - 2, 0, -1): deltas[layer] = np.dot(deltas[layer + 1], self.weight_matrices[layer].T) * activation( layer_activations[layer], type=self.non_lins[layer], deriv=True) return deltas
def backward_propagation(self,y): layer_names = len(list(self.layers.keys()))-1 try: batch_size = y.shape[1] except IndexError as e: batch_size=1 y = cp.reshape(y,(y.shape[0],batch_size)) for i in range(layer_names,0,-1): if i==0: continue if i == list(self.layers.keys())[-1]: self.dB[i-1] = (1/batch_size)*cp.sum(cp.subtract(self.layers[i],y),axis=1) self.dB[i-1]= cp.reshape(self.dB[i-1],(self.dB[i-1].shape[0],1)) self.deltas[i] = cp.subtract(self.layers[i],y) self.dW[i-1] = (1/batch_size)*cp.dot(self.deltas[i],self.layers[i-1].T) #i=3...1 else: self.deltas[i]=cp.multiply(cp.matmul(self.weights[i].T,self.deltas[i+1]),activation(str(self.activation_functions[i-1])+'_der',self.zeta[i])) self.dB[i-1] = (1/batch_size)*cp.sum(self.deltas[i], axis=1) self.dB[i-1]= cp.reshape(self.dB[i-1],(self.dB[i-1].shape[0],1)) self.dW[i-1] = (1/batch_size)*cp.dot(self.deltas[i],self.layers[i-1].T)
def backward(self, trainData, trainLabel ): assert( len(trainData) == len(trainLabel)), 'Equal to Batch Size' batchSize = len(trainData) weights = self.Weights biases = self.Biases DirW = self.DirW DirB = self.DirB poolParams = self.poolParams # dWeights = np.zeros(weights.shape) # dBiases = np.zeros(biases.shape) dW4 = np.zeros(weights[4].shape) dB4 = np.zeros(biases[4].shape) dW3 = np.zeros(weights[3].shape) dB3 = np.zeros(biases[3].shape) dW2 = np.zeros(weights[2].shape) dB2 = np.zeros(biases[2].shape) dW1 = np.zeros(weights[1].shape) dB1 = np.zeros(biases[1].shape) dW0 = np.zeros(weights[0].shape) dB0 = np.zeros(biases[0].shape) loss = 0 for image in range(batchSize): X_data = trainData[image] X_label = trainLabel[image] ###Forward Pass # layer0 = input Layer layer0 = np.asarray(X_data) # layer1 = conv1 layer layer1 = convFwd(np.asarray([layer0]),weights[0],biases[0]) # layer2 = pool1 layer layer2 = poolFwd(layer1, poolParams[0][0], poolParams[0][1]) # layer3 = conv2 layer layer3 = convFwd(layer2,weights[1],biases[1]) # layer4 = pool2 layer layer4 = poolFwd(layer3, poolParams[1][0], poolParams[1][1]) # layer5 = fc1 layer layer5 = convFwd( layer4,weights[2] ,biases[2] ) # layer6 = fc2 layer layer6 = act.activation(np.dot(weights[3],layer5[:,0]).transpose() + biases[3] , activation ).transpose() # layer7 = softmax layer layer7 = np.dot( weights[4], layer6[:,0] ).transpose() + biases[4] layer7 -= np.max(layer7) layer7 = np.exp(layer7)/sum(np.exp(layer7)) loss += -1*sum( X_label * np.log(layer7) ) ### Gradients Accumulate dy = -1*(X_label - layer7)/2 [dy, dW, dB ] = fcback(layer6, np.asarray([dy]).transpose() , weights[4]) dW4 += dW dB4 += dB.flatten() dy = act.backActivate(dy.transpose(), layer6, activation) [dy, dW, dB ] = fcback(layer5[:,0], dy, weights[3]) dW3 += dW dB3 += dB.flatten() dy = act.backActivate(dy.transpose(), layer5[:,0], activation) [dy, dW, dB ] = convBack(layer4, dy, weights[2]) dW2 += dW dB2 += dB.flatten() dy = poolback(layer3, dy) dy = act.backActivate(dy, layer3, activation) [dy, dW, dB ] = convBack(layer2, dy, weights[1]) dW1 += dW dB1 += dB.flatten() dy = poolback(layer1, dy) dy = act.backActivate(dy, layer1, activation) [dy, dW, dB ] = convBack(np.asarray([layer0]), dy, weights[0]) dW0 += dW dB0 += dB.flatten() # Updates DirW[0] = alpha*DirW[0] - lr*dW0/batchSize weights[0] += DirW[0] DirW[1] = alpha*DirW[1] - lr*dW1/batchSize weights[1] += DirW[1] DirW[2] = alpha*DirW[2] - lr*dW2/batchSize weights[2] += DirW[2] DirW[3] = alpha*DirW[3] - lr*dW3/batchSize weights[3] += DirW[3] DirW[4] = alpha*DirW[4] - lr*dW4/batchSize weights[4] += DirW[4] DirB[0] = alpha*DirB[0] - lr*dB0/batchSize biases[0] += DirB[0] DirB[1] = alpha*DirB[1] - lr*dB1/batchSize biases[1] += DirB[1] DirB[2] = alpha*DirB[2] - lr*dB2/batchSize biases[2] += DirB[2] DirB[3] = alpha*DirB[3] - lr*dB3/batchSize biases[3] += DirB[3] DirB[4] = alpha*DirB[4] - lr*dB4/batchSize biases[4] += DirB[4] self.Weights = weights self.Biases = biases # return [loss/batchSize, dW4/batchSize] return loss/batchSize