def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### h_k = x h_ks = [h_k] a_ks = [] for b, wT in zip(biases, weightsT): a_k = np.dot(wT, h_k) + b a_ks.append(a_k) h_k = sigmoid(a_k) h_ks.append(h_k) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer #delta = (cost).df_wrt_a(h_ks[-1], y) delta = (cost).df_wrt_a(h_ks[-1], y) * sigmoid_prime(a_ks[-1]) nabla_b[-1] = delta nabla_wT[-1] = np.dot(delta, h_ks[-2].T) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for i in range(2, num_layers): delta = np.dot(weightsT[-i + 1].T, delta) * sigmoid_prime(a_ks[-i]) nabla_b[-i] = delta nabla_wT[-i] = np.dot(delta, h_ks[-i - 1].T) # for i in range(num_layers-2,-1,-1): # delta = np.multiply(delta,sigmoid_prime(a_ks[i])) # nabla_b[i] = delta # nabla_wT[i] = (np.dot(h_ks[i],delta.T)).T # delta = np.dot(weightsT[i].T,delta) return (nabla_b, nabla_wT)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] activations = [] activations.append(x) z = [] for i in range(1, num_layers): b = biases[i - 1] wT = weightsT[i - 1] z.append(np.dot(wT, activations[i - 1]) + b) activations.append(sigmoid(z[i - 1])) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).df_wrt_a(activations[-1], y) delta = delta * sigmoid_prime(z[-1]) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### nabla_b[-1] = delta nabla_wT[-1] = np.dot(delta, activations[-2].transpose()) for i in range(2, num_layers): delta = np.dot(weightsT[-i + 1].transpose(), delta) * sigmoid_prime( z[-i]) nabla_b[-i] = delta nabla_wT[-i] = np.dot(delta, activations[-i - 1].transpose()) return (nabla_b, nabla_wT)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weightsT] # feedforward activation = x activations = [x] # list to store all the activations, layer by layer zs = [] # list to store all the z vectors, layer by layer for b, w in zip(biases, weightsT): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) # backward pass delta = (cost).df_wrt_a(activations[-1], y) * sigmoid_prime(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) # Note that the variable l in the loop below is used a little # differently to the notation in Chapter 2 of the book. Here, # l = 1 means the last layer of neurons, l = 2 is the # second-last layer, and so on. It's a renumbering of the # scheme in the book, used here to take advantage of the fact # that Python can use negative indices in lists. for l in range(2, num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(weightsT[-l + 1].transpose(), delta) * sp nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ temp = (np.transpose(((np.transpose(weights[0])) * x))) + biases[0] # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### # Feed forward code. @TODO still need to figure out what to pass the cost delta function - Zach Johnston h = [] h.append(x) #print(h[0].shape) a = [] for k in range(1, num_layers): a.append(np.dot(weights[k - 1], h[k - 1]) + biases[k - 1]) h.append(sigmoid(a[k - 1])) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer #delta = (cost).delta(activations[-1], y) delta = (cost).delta(h[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### #for layer in range((num_layers-1),0,-1): # Backpropagate the error # error_prev = np.multiply(np.dot(np.transpose(weights[layer-1]),error_prev),sigmoid_prime(h[layer-1])) # error.append(error_prev) # nabla_b.append(error_prev) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, h[-2].transpose()) for layer in range(2, num_layers): delta = np.dot(weights[-layer + 1].transpose(), delta) * sigmoid_prime( a[-layer]) nabla_b[-layer] = delta nabla_w[-layer] = np.dot(delta, h[-layer - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass activations = [x] # List to store the activation values zs = [] # List to store the matrix multiplications val = x for i in range(num_layers - 1): val = np.dot(weights[i], val) + biases[i] zs.append(val) val = sigmoid(val) activations.append(val) ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(activations[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias # backward pass nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) for l in range(2, num_layers): z = zs[-l] delta = np.dot(weights[-l + 1].transpose(), delta) * sigmoid_prime(z) nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### # a =[np.zeros(b.shape) for b in biases] h_s = [np.zeros(b.shape) for b in biases] h0=[x] activations=h0+h_s for k in range(num_layers -1): a[k]= biases[k] + np.dot(weights[k], activations[k]) activations[k+1] = sigmoid(a[k]) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(activations[-1], y) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for k in range(2, num_layers): delta = np.dot(weights[-k +1].T, delta) *sigmoid_prime(a[-k]) nabla_b[-k] = delta nabla_w[-k] = np.dot(delta, activations[-k-1].T) return (nabla_b, nabla_w)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] ### Implement here # feedforward activations = [x] a = [x] for i in range(0, num_layers - 1): a.append(biases[i] + np.dot(weightsT[i], activations[i])) activations.append(sigmoid(a[-1])) # Here you need to store all the activations of all the units # by feedforward pass ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).df_wrt_a(activations[-1], y) ### Implement here G = delta for k in range(num_layers - 1, 0, -1): nabla_b[k - 1] = G nabla_wT[k - 1] = np.dot(G, np.transpose(activations[k - 1])) G = np.dot(np.transpose(weightsT[k - 1]), G) G = np.multiply(G, sigmoid_prime(a[k - 1])) # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### return (nabla_b, nabla_wT)
def test_sigmoid(): z = np.arange(-10, 10, 0.1) y = act.sigmoid(z) y_p = act.sigmoid_prime(z) plt.figure() plt.subplot(1, 2, 1) plt.plot(z, y) plt.title('sigmoid') plt.subplot(1, 2, 2) plt.plot(z, y_p) plt.title('derivative sigmoid') plt.show()
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [] nabla_w = [] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### activations = [x] #store activations h = [] #store the z vectors for i in range(num_layers - 1): ai = biases[i] + np.dot(weights[i], activations[i]) h.append(ai) activations.append(sigmoid(ai)) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer g = (cost).delta(activations[-1], y) #delta for output layer for cross entropy cost nabla_w = [np.dot(g, np.transpose(activations[-2]))] nabla_b = [g] ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for i in range(2, num_layers): g = np.dot(weights[-i + 1].transpose(), g) * sigmoid_prime(h[-i]) nabla_b = [g] + nabla_b nabla_w = [np.dot(g, np.transpose(activations[-i - 1]))] + nabla_w return (nabla_b, nabla_w)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] activations = [[], []] tempx = x for supercount in range(len(biases)): for counts in range(len(biases[supercount])): activations[supercount].append( sigmoid( np.dot(weightsT[supercount][counts], tempx) + biases[supercount][counts])) tempx = activations[supercount] delta = (cost).df_wrt_a(activations[1], y) for i in range(len(biases) - 1, -1, -1): activationsD = [] for j in range(len(activations[i])): activationsD.append(sigmoid_prime(activations[i][j])) delta = np.multiply(delta, activationsD) nabla_b[i] = delta if (i == 0): nabla_wT[i] = np.dot(nabla_b[i], np.transpose(x)) else: nabla_wT[i] = np.dot(nabla_b[i], np.transpose(activations[i - 1])) delta = np.dot(np.transpose(weightsT[i]), delta) return (nabla_b, nabla_wT)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] activations = [] activations.append(x) for k in range(0, num_layers - 1): activations.append( sigmoid(np.dot(weightsT[k], activations[k]) + biases[k])) delta = (cost).df_wrt_a(activations[-1], y) for i in range(num_layers - 2, -1, -1): if i == num_layers - 2: nabla_b[i] = delta nabla_wT[i] = np.dot(delta, np.transpose(activations[-2])) else: delta = np.dot(np.transpose(weightsT[i + 1]), delta) * sigmoid_prime( np.dot(weightsT[i], activations[i]) + biases[i]) nabla_b[i] = delta nabla_wT[i] = np.dot(delta, np.transpose(activations[i])) return (nabla_b, nabla_wT)
def backprop(x, y, biases, weights, cost, num_layers): nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] activation = x activations = [x] before_activations = [] for bias, weight in zip(biases, weights): before_activation = np.dot(weight, activation) + bias before_activations.append(before_activation) activation = sigmoid(before_activation) activations.append(activation) delta = (cost).delta(activations[-1], y) for l in range(1, num_layers): if l != 1: delta = np.dot(weights[-l + 1].transpose(), delta) * sigmoid_prime( before_activations[-l]) nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass pre_act = [np.zeros(b.shape) for b in biases] # h^k pre_act.insert(0, np.nan) activations = [np.zeros(b.shape) for b in biases] # a^k activations.insert(0, x) # pre_act[0] = np.matmul(weightsT[0], x) + biases[0] # first activation is from input layer: x = h^(k-1) -> a^k # activations[0] = sigmoid(pre_act[0]) # hit activation layer with sigmoid function for each element h for i in range( 0, num_layers - 1 ): # use previous activations layer output as current layers inputs # previous layers outputs to make new one h^(k-1) -> a^k pre_act[i + 1] = np.matmul(weightsT[i], activations[i]) + biases[i] # # hit activation layer with sigmoid function for each element: g(a^k) -> h^k activations[i + 1] = sigmoid(pre_act[i + 1]) ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).df_wrt_a(activations[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias # first output layer ######$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ G = delta nabla_b[-1] = G nabla_wT[-1] = np.transpose(np.matmul(activations[-2], np.transpose(G))) G = np.matmul(np.transpose(weightsT[-1]), G) # Restart Algorithm for n in reversed(range(0, num_layers - 2)): G = np.multiply(G, sigmoid_prime(pre_act[n + 1])) nabla_b[n] = G nabla_wT[n] = np.transpose(np.matmul(activations[n], np.transpose(G))) G = np.matmul(np.transpose(weightsT[n]), G) # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ ### return (nabla_b, nabla_wT)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] """" print("x.shape",x.shape) print("y",y.shape) print("b",biases[1].shape) print("l_w",len(weights)) print("w",weights[1].shape) """ ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### index = 0 activations = [np.zeros(b.shape) for b in biases] input = x for b, w in zip(biases, weights): activations[index] = sigmoid(np.dot(w, x) + b) x = activations[index] index += 1 # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(activations[-1], y) gradlayer = delta / (activations[-1] * (1 - activations[-1])) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for y in range(0, len(activations)): gradactivation = np.multiply( gradlayer, sigmoid_prime( np.log((activations[-1 - y]) / (1 - activations[-1 - y])))) nabla_b[-1 - y] = gradactivation if (y == (len(activations) - 1)): activation = input else: activation = activations[-2 - y] nabla_w[-1 - y] = np.dot(gradactivation, activation.transpose()) gradlayer = np.dot(weights[-1 - y].transpose(), gradactivation) return (nabla_b, nabla_w)