def backprop(self, x, y): new_biase = [np.zeros(b.shape) for b in self.biases] new_weight = [np.zeros(w.shape) for w in self.weights] activation = x activations = [x] # list to store all the activations, layer by layer zs = [] # list to store all the z vectors, layer by layer for b, w in zip(self.biases, self.weights): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) # backward pass delta = self.cost_derivative(activations[-1], y) * sigmoid_prime( zs[-1]) new_biase[-1] = delta new_weight[-1] = np.dot(delta, activations[-2].transpose()) for l in range(2, self.num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp new_biase[-l] = delta new_weight[-l] = np.dot(delta, activations[-l - 1].transpose()) return (new_biase, new_weight)
def train(train_x, train_y, learning_rate=0.2): # Flatten input (batch_size, 28, 28) -> (batch_size, 784) x = train_x.reshape(train_x.shape[0], -1) # Turn labels into their one-hot representations y = one_hot_encoder(train_y) # Initialize weights w1, b1 = initialize_weight((784, 256), bias=True) w2, b2 = initialize_weight((256, 10), bias=True) num_epochs = 50 loss_history = [] for epoch in range(1, num_epochs + 1): print("Epoch {}/{}\n===============".format(epoch, num_epochs)) # Forward Prop h1 = np.dot(x, w1) + b1 a1 = sigmoid(h1) h2 = np.dot(a1, w2) + b2 a2 = softmax(h2) out = a2 # Cross Entropy Loss loss = cross_entropy_loss(out, train_y) loss_history.append(loss) print("Loss: {:.6f}".format(loss)) # Compute and print accuracy pred = np.argmax(out, axis=1) pred = pred.reshape(pred.shape[0], 1) acc = np.mean(pred == train_y) print("Accuracy: {:.2f}%\n".format(acc * 100)) # Backward Prop m = out.shape[0] dh2 = a2 - y dw2 = (1 / m) * np.dot(a1.T, dh2) db2 = (1 / m) * np.sum(dh2, axis=0, keepdims=True) dh1 = np.dot(dh2, w2.T) * sigmoid_prime(a1) dw1 = (1 / m) * np.dot(x.T, dh1) db1 = (1 / m) * np.sum(dh1, axis=0, keepdims=True) # Weight (and bias) update w1 -= learning_rate * dw1 b1 -= learning_rate * db1 w2 -= learning_rate * dw2 b2 -= learning_rate * db2 return w1, b1, w2, b2, loss_history
def test_sigmoid(): z = np.arange(-10, 10, 0.1) y = act.sigmoid(z) y_p = act.sigmoid_prime(z) plt.figure() plt.subplot(1, 2, 1) plt.plot(z, y) plt.title('sigmoid') plt.subplot(1, 2, 2) plt.plot(z, y_p) plt.title('derivative sigmoid') plt.show()
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] #print(num_layers) #print(x.shape) #print(weights[0].shape) #print(biases[0].shape) ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### h = [] h.append(x) for i in range((num_layers - 1)): a = sigmoid(np.dot(weights[i], h[i]) + biases[i]) h.append(a) #h1 = sigmoid(np.dot(weights[0],x) + biases[0]) #h2 = sigmoid(np.dot(weights[1],h1) + biases[1]) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(h[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### nabla_b[-1] = delta * sigmoid_prime(h[-1]) nabla_w[-1] = np.dot(nabla_b[-1], h[-2].transpose()) for i in range(num_layers - 3, -1, -1): nabla_b[i] = np.dot(weights[i + 1].transpose(), nabla_b[i + 1]) * sigmoid_prime(h[i + 1]) nabla_w[i] = np.dot(nabla_b[i], h[i].transpose()) # for i in range(0,num_layers-1): # nabla_b[-2-i] = np.dot(weights[-2-i+1].transpose(),nabla_b[-2-i+1])*sigmoid_prime(h[-2-i+1]) # nabla_w[-2-i] = np.dot(nabla_b[-2-i],h[-2-i].transpose()) #nabla_b[0] = np.dot(weights[1].transpose(),nabla_b[1])*sigmoid_prime(h[1]) #nabla_w[0] = np.dot(nabla_b[0],h[0].transpose()) #nabla_b[0] = sigmoid_prime(h1) #print(weights[1].shape) #print(nabla_w[0].shape) #print(nabla_w[1].shape) #print(np.dot(nabla_b[1],h1.transpose()).shape) #print(np.dot(nabla_b[0],x.transpose()).shape) return (nabla_b, nabla_w)
def train(train_x, train_y, learning_rate=0.1, num_epochs=50, batch_size=1): # Flatten input (num_samples, 28, 28) -> (num_samples, 784) x = train_x.reshape(train_x.shape[0], -1) num_samples = x.shape[0] # Turn labels into their one-hot representations y = one_hot_encoder(train_y) # Make a data loader trainloader = dataloader(x, y, batch_size=batch_size, shuffle=True) # Initialize weights w1, b1 = initialize_weight((784, 256), bias=True) w2, b2 = initialize_weight((256, 10), bias=True) loss_history = [] for epoch in range(1, num_epochs + 1): print("Epoch {}/{}\n===============".format(epoch, num_epochs)) batch_loss = 0 acc = 0 for inputs, labels in trainloader: # Number of samples per batch m = inputs.shape[0] # Forward Prop h1 = np.dot(inputs, w1) + b1 a1 = sigmoid(h1) h2 = np.dot(a1, w2) + b2 a2 = softmax(h2) out = a2 # Cross Entropy Loss batch_loss += cross_entropy_loss( out, labels.argmax(axis=1).reshape(m, 1)) # Compute Accuracy pred = np.argmax(out, axis=1) pred = pred.reshape(pred.shape[0], 1) acc += np.sum(pred == labels.argmax(axis=1).reshape(m, 1)) # Backward Prop dh2 = a2 - labels dw2 = (1 / m) * np.dot(a1.T, dh2) db2 = (1 / m) * np.sum(dh2, axis=0, keepdims=True) dh1 = np.dot(dh2, w2.T) * sigmoid_prime(a1) dw1 = (1 / m) * np.dot(inputs.T, dh1) db1 = (1 / m) * np.sum(dh1, axis=0, keepdims=True) # Weight (and bias) update w1 -= learning_rate * dw1 b1 -= learning_rate * db1 w2 -= learning_rate * dw2 b2 -= learning_rate * db2 loss_history.append(batch_loss / num_samples) print("Loss: {:.6f}".format(batch_loss / num_samples)) print("Accuracy: {:.2f}%\n".format(acc / num_samples * 100)) return w1, b1, w2, b2, loss_history