def forward_prop(self, x): if len(x.shape) != 2 or x.shape[1] != self.n_inputs: raise ValueError('Incorrect input shape ' + str(x.shape) + ' given!') else: x = np.append(-np.ones((len(x), 1)), x, axis=1) self.a1 = x self.ins2 = np.matmul(self.a1, self.w1) self.a2 = sig(self.ins2) self.a2 = np.append(-np.ones((len(self.a2), 1)), self.a2, axis=1) self.ins3 = np.matmul(self.a2, self.w2) self.a3 = sig(self.ins3) return self.a3
def Accuracy(self, X_test, Y_test): # No of instances N = Y_test.shape[0] # Adding bias 1s X_test = np.row_stack((np.ones((1, N)), X_test)) # Z will store output of every layer Z = X_test for k in range(self.n_layers): # weight for kth layer w = self.weights[k] zk = w.dot(Z) # For last layer there is no activation function # Activation function: Sigmoid zk = sig(zk) if k != self.n_layers - 1: # Adding bias term zk = np.row_stack((np.ones((1, N)), zk)) Z = zk Z = Z.T count = 0 for i in range(Z.shape[0]): if np.argmax(Y_test[i]) == np.argmax(Z[i]): count += 1 return count * 100 / N
def dsig(x): return sig(x) * (1 - sig(x))
def train_network(self, X_train, Y_train, X_test, Y_test, mini_batch_size, epochs, learning_rate, dropout_rate=0): N = len(Y_train) # Data shuffling # r = random.sample(range(0, N), N) # X_train = X_train[:,r] # Y_train = Y_train[r] # For plotting graph self.X_Graph = [] self.Y_train_Graph = [] self.Y_test_Graph = [] # Running epochs for i in range(epochs): # For each minibatch for j in range(0, N, mini_batch_size): N_mini = min(j + mini_batch_size, N) - j X_mini = X_train[:, j:j + mini_batch_size] # Adding bias term X_mini = np.row_stack((np.ones((1, N_mini)), X_mini)) Y_mini = Y_train[j:j + mini_batch_size] # Forward Pass # Z will store input, and output of every layer Z = [X_mini] for k in range(self.n_layers): # weight for kth layer w = self.weights[k] if dropout_rate > 0 and dropout_rate < 1: # Bias term is not effected Z[k][1:, ] *= np.tile( np.random.binomial(1, 1 - dropout_rate, (Z[k].shape[0] - 1, 1)), mini_batch_size) # Output of kth layer Z[k+1] = w*Z[k] zk = w.dot(Z[k]) # For last layer there is no activation function # Activation function: Sigmoid zk = sig(zk) if k != self.n_layers - 1: # # Activation function: Sigmoid # zk = sig(zk) # Adding bias term zk = np.row_stack((np.ones((1, N_mini)), zk)) Z.append(zk) # Backward Pass delta_weights = [] for k in range(self.n_layers): delta_weights.append( np.zeros((self.architecture[k] + 1, self.architecture[k + 1])).T) # For last layer as it don't has activation function delta_z = (Z[self.n_layers] - Y_mini.T) * (Z[self.n_layers] * (1 - Z[self.n_layers])) # delta_w = (output - y)*Z(K-1) # delta_weights[self.n_layers-1] = delta_z.dot(Z[self.n_layers-1].T)/N_mini delta_weights[self.n_layers - 1] = (Z[self.n_layers - 1].dot( delta_z.T)).T for k in range(self.n_layers - 2, -1, -1): delta_z = Z[k + 1] * ( 1 - Z[k + 1]) * self.weights[k + 1].T.dot(delta_z) # Removing bias term delta_z = delta_z[1:, :] # delta_weights[k] = delta_z.dot(Z[k].T)/N_mini delta_weights[k] = delta_z.dot(Z[k].T) # print(delta_weights[k]) for k in range(self.n_layers): print(np.max(delta_weights[k])) self.weights[ k] = self.weights[k] - learning_rate * delta_weights[k] training_Accuracy = self.Accuracy(X_train, Y_train) testing_Accuracy = self.Accuracy(X_test, Y_test) print( i + 1, "\tTraining " + str(training_Accuracy) + "\tTesting " + str(testing_Accuracy)) self.X_Graph.append(i + 1) self.Y_train_Graph.append(training_Accuracy) self.Y_test_Graph.append(testing_Accuracy)
def sigmoid(x, derivative=False): if derivative: return x * (1 - x) return sig(x)
def sigmoid(arr): return sig(arr)