Python sigmoid_prime Beispiele, src.activation.sigmoid_prime Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: bp.py Projekt: vivekrathi14/MLP-Handwritten-Digit-Only-NumPy

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as 
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_wT = [np.zeros(wT.shape) for wT in weightsT]

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###
    h_k = x
    h_ks = [h_k]
    a_ks = []
    for b, wT in zip(biases, weightsT):
        a_k = np.dot(wT, h_k) + b
        a_ks.append(a_k)
        h_k = sigmoid(a_k)
        h_ks.append(h_k)

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    #delta = (cost).df_wrt_a(h_ks[-1], y)
    delta = (cost).df_wrt_a(h_ks[-1], y) * sigmoid_prime(a_ks[-1])
    nabla_b[-1] = delta
    nabla_wT[-1] = np.dot(delta, h_ks[-2].T)

    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###
    for i in range(2, num_layers):
        delta = np.dot(weightsT[-i + 1].T, delta) * sigmoid_prime(a_ks[-i])
        nabla_b[-i] = delta
        nabla_wT[-i] = np.dot(delta, h_ks[-i - 1].T)

#    for i in range(num_layers-2,-1,-1):
#        delta = np.multiply(delta,sigmoid_prime(a_ks[i]))
#        nabla_b[i] = delta
#        nabla_wT[i] = (np.dot(h_ks[i],delta.T)).T
#        delta = np.dot(weightsT[i].T,delta)

    return (nabla_b, nabla_wT)

Beispiel #2

0

Datei anzeigen

Datei: bp.py Projekt: hrshagrwl/back-propagation

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as 
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_wT = [np.zeros(wT.shape) for wT in weightsT]

    activations = []
    activations.append(x)
    z = []
    for i in range(1, num_layers):
        b = biases[i - 1]
        wT = weightsT[i - 1]
        z.append(np.dot(wT, activations[i - 1]) + b)
        activations.append(sigmoid(z[i - 1]))

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    delta = (cost).df_wrt_a(activations[-1], y)
    delta = delta * sigmoid_prime(z[-1])
    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###

    nabla_b[-1] = delta
    nabla_wT[-1] = np.dot(delta, activations[-2].transpose())

    for i in range(2, num_layers):
        delta = np.dot(weightsT[-i + 1].transpose(), delta) * sigmoid_prime(
            z[-i])
        nabla_b[-i] = delta
        nabla_wT[-i] = np.dot(delta, activations[-i - 1].transpose())

    return (nabla_b, nabla_wT)

Beispiel #3

0

Datei anzeigen

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as 
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weightsT]
    # feedforward
    activation = x
    activations = [x]  # list to store all the activations, layer by layer
    zs = []  # list to store all the z vectors, layer by layer
    for b, w in zip(biases, weightsT):
        z = np.dot(w, activation) + b
        zs.append(z)
        activation = sigmoid(z)
        activations.append(activation)
        # backward pass
    delta = (cost).df_wrt_a(activations[-1], y) * sigmoid_prime(zs[-1])
    nabla_b[-1] = delta
    nabla_w[-1] = np.dot(delta, activations[-2].transpose())
    # Note that the variable l in the loop below is used a little
    # differently to the notation in Chapter 2 of the book.  Here,
    # l = 1 means the last layer of neurons, l = 2 is the
    # second-last layer, and so on.  It's a renumbering of the
    # scheme in the book, used here to take advantage of the fact
    # that Python can use negative indices in lists.
    for l in range(2, num_layers):
        z = zs[-l]
        sp = sigmoid_prime(z)
        delta = np.dot(weightsT[-l + 1].transpose(), delta) * sp
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

Beispiel #4

0

Datei anzeigen

Datei: bp.py Projekt: ashish9995/Neural-Networks-and-Deep-Learning

def backprop(x, y, biases, weights, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_w): tuple containing the gradient for all the biases
                and weights. nabla_b and nabla_w should be the same shape as 
                input biases and weights
    """
    temp = (np.transpose(((np.transpose(weights[0])) * x))) + biases[0]
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###
    # Feed forward code. @TODO still need to figure out what to pass the cost delta function - Zach Johnston
    h = []
    h.append(x)
    #print(h[0].shape)
    a = []
    for k in range(1, num_layers):
        a.append(np.dot(weights[k - 1], h[k - 1]) + biases[k - 1])
        h.append(sigmoid(a[k - 1]))

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    #delta = (cost).delta(activations[-1], y)
    delta = (cost).delta(h[-1], y)
    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###
    #for layer in range((num_layers-1),0,-1): # Backpropagate the error
    #    error_prev = np.multiply(np.dot(np.transpose(weights[layer-1]),error_prev),sigmoid_prime(h[layer-1]))
    #    error.append(error_prev)
    #    nabla_b.append(error_prev)
    nabla_b[-1] = delta
    nabla_w[-1] = np.dot(delta, h[-2].transpose())
    for layer in range(2, num_layers):
        delta = np.dot(weights[-layer + 1].transpose(), delta) * sigmoid_prime(
            a[-layer])
        nabla_b[-layer] = delta
        nabla_w[-layer] = np.dot(delta, h[-layer - 1].transpose())

    return (nabla_b, nabla_w)

Beispiel #5

0

Datei anzeigen

def backprop(x, y, biases, weights, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_w): tuple containing the gradient for all the biases
                and weights. nabla_b and nabla_w should be the same shape as 
                input biases and weights
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass

    activations = [x]  # List to store the activation values
    zs = []  # List to store the matrix multiplications
    val = x
    for i in range(num_layers - 1):
        val = np.dot(weights[i], val) + biases[i]
        zs.append(val)
        val = sigmoid(val)
        activations.append(val)
    ###

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    delta = (cost).delta(activations[-1], y)

    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias

    # backward pass

    nabla_b[-1] = delta
    nabla_w[-1] = np.dot(delta, activations[-2].transpose())

    for l in range(2, num_layers):
        z = zs[-l]
        delta = np.dot(weights[-l + 1].transpose(), delta) * sigmoid_prime(z)
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())

    return (nabla_b, nabla_w)

Beispiel #6

0

Datei anzeigen

def backprop(x, y, biases, weights, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_w): tuple containing the gradient for all the biases
                and weights. nabla_b and nabla_w should be the same shape as 
                input biases and weights
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###
    #
    a =[np.zeros(b.shape) for b in biases]
    h_s = [np.zeros(b.shape) for b in biases]

    h0=[x]
    activations=h0+h_s

    for k in range(num_layers -1):
       a[k]= biases[k] + np.dot(weights[k], activations[k])
       activations[k+1] = sigmoid(a[k])



    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    delta = (cost).delta(activations[-1], y)

    nabla_b[-1] = delta
    nabla_w[-1] = np.dot(delta, activations[-2].transpose())
    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###
    for k in range(2, num_layers):
        delta = np.dot(weights[-k +1].T, delta) *sigmoid_prime(a[-k])
        nabla_b[-k] = delta
        nabla_w[-k] = np.dot(delta, activations[-k-1].T)
    return (nabla_b, nabla_w)

Beispiel #7

0

Datei anzeigen

Datei: bp.py Projekt: Abhishek-EE/Multilayered_Perceptron

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_wT = [np.zeros(wT.shape) for wT in weightsT]

    ### Implement here
    # feedforward
    activations = [x]
    a = [x]
    for i in range(0, num_layers - 1):
        a.append(biases[i] + np.dot(weightsT[i], activations[i]))
        activations.append(sigmoid(a[-1]))

    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    delta = (cost).df_wrt_a(activations[-1], y)

    ### Implement here
    G = delta

    for k in range(num_layers - 1, 0, -1):

        nabla_b[k - 1] = G
        nabla_wT[k - 1] = np.dot(G, np.transpose(activations[k - 1]))
        G = np.dot(np.transpose(weightsT[k - 1]), G)
        G = np.multiply(G, sigmoid_prime(a[k - 1]))
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###

    return (nabla_b, nabla_wT)

Beispiel #8

0

Datei anzeigen

Datei: mlp.py Projekt: ashish9995/Neural-Networks-and-Deep-Learning

def test_sigmoid():
    z = np.arange(-10, 10, 0.1)
    y = act.sigmoid(z)
    y_p = act.sigmoid_prime(z)

    plt.figure()
    plt.subplot(1, 2, 1)
    plt.plot(z, y)
    plt.title('sigmoid')

    plt.subplot(1, 2, 2)
    plt.plot(z, y_p)
    plt.title('derivative sigmoid')
    plt.show()

Beispiel #9

0

Datei anzeigen

Datei: bp.py Projekt: priyadiwakar/NeuralNetworks

def backprop(x, y, biases, weights, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.
        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network
        Returns:
            (nabla_b, nabla_w): tuple containing the gradient for all the biases
                and weights. nabla_b and nabla_w should be the same shape as 
                input biases and weights
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = []
    nabla_w = []

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###

    activations = [x]  #store activations
    h = []  #store the z vectors
    for i in range(num_layers - 1):
        ai = biases[i] + np.dot(weights[i], activations[i])
        h.append(ai)
        activations.append(sigmoid(ai))

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    g = (cost).delta(activations[-1],
                     y)  #delta for output layer for cross entropy cost
    nabla_w = [np.dot(g, np.transpose(activations[-2]))]
    nabla_b = [g]

    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###
    for i in range(2, num_layers):
        g = np.dot(weights[-i + 1].transpose(), g) * sigmoid_prime(h[-i])
        nabla_b = [g] + nabla_b
        nabla_w = [np.dot(g, np.transpose(activations[-i - 1]))] + nabla_w

    return (nabla_b, nabla_w)

Beispiel #10

0

Datei anzeigen

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_wT = [np.zeros(wT.shape) for wT in weightsT]

    activations = [[], []]
    tempx = x
    for supercount in range(len(biases)):
        for counts in range(len(biases[supercount])):
            activations[supercount].append(
                sigmoid(
                    np.dot(weightsT[supercount][counts], tempx) +
                    biases[supercount][counts]))
        tempx = activations[supercount]
    delta = (cost).df_wrt_a(activations[1], y)

    for i in range(len(biases) - 1, -1, -1):
        activationsD = []
        for j in range(len(activations[i])):
            activationsD.append(sigmoid_prime(activations[i][j]))
        delta = np.multiply(delta, activationsD)
        nabla_b[i] = delta
        if (i == 0):
            nabla_wT[i] = np.dot(nabla_b[i], np.transpose(x))
        else:
            nabla_wT[i] = np.dot(nabla_b[i], np.transpose(activations[i - 1]))
        delta = np.dot(np.transpose(weightsT[i]), delta)

    return (nabla_b, nabla_wT)

Beispiel #11

0

Datei anzeigen

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as 
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_wT = [np.zeros(wT.shape) for wT in weightsT]

    activations = []
    activations.append(x)
    for k in range(0, num_layers - 1):
        activations.append(
            sigmoid(np.dot(weightsT[k], activations[k]) + biases[k]))

    delta = (cost).df_wrt_a(activations[-1], y)

    for i in range(num_layers - 2, -1, -1):

        if i == num_layers - 2:
            nabla_b[i] = delta
            nabla_wT[i] = np.dot(delta, np.transpose(activations[-2]))
        else:
            delta = np.dot(np.transpose(weightsT[i + 1]),
                           delta) * sigmoid_prime(
                               np.dot(weightsT[i], activations[i]) + biases[i])
            nabla_b[i] = delta
            nabla_wT[i] = np.dot(delta, np.transpose(activations[i]))

    return (nabla_b, nabla_wT)

Beispiel #12

0

Datei anzeigen

def backprop(x, y, biases, weights, cost, num_layers):
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]
    activation = x
    activations = [x]
    before_activations = []
    for bias, weight in zip(biases, weights):
        before_activation = np.dot(weight, activation) + bias
        before_activations.append(before_activation)
        activation = sigmoid(before_activation)
        activations.append(activation)

    delta = (cost).delta(activations[-1], y)

    for l in range(1, num_layers):
        if l != 1:
            delta = np.dot(weights[-l + 1].transpose(), delta) * sigmoid_prime(
                before_activations[-l])
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
    return (nabla_b, nabla_w)

Beispiel #13

0

Datei anzeigen

def backprop(x, y, biases, weightsT, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and transposed weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_wT): tuple containing the gradient for all the biases
                and weightsT. nabla_b and nabla_wT should be the same shape as 
                input biases and weightsT
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_wT = [np.zeros(wT.shape) for wT in weightsT]

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    pre_act = [np.zeros(b.shape) for b in biases]  # h^k
    pre_act.insert(0, np.nan)
    activations = [np.zeros(b.shape) for b in biases]  # a^k
    activations.insert(0, x)

    # pre_act[0] = np.matmul(weightsT[0], x) + biases[0]  # first activation is from input layer: x = h^(k-1) -> a^k
    # activations[0] = sigmoid(pre_act[0])  # hit activation layer with sigmoid function for each element h

    for i in range(
            0, num_layers - 1
    ):  # use previous activations layer output as current layers inputs
        # previous layers outputs to make new one   h^(k-1) -> a^k
        pre_act[i + 1] = np.matmul(weightsT[i], activations[i]) + biases[i]
        # # hit activation layer with sigmoid function for each element: g(a^k) -> h^k
        activations[i + 1] = sigmoid(pre_act[i + 1])
    ###

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    delta = (cost).df_wrt_a(activations[-1], y)

    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias

    # first output layer ######$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
    G = delta
    nabla_b[-1] = G
    nabla_wT[-1] = np.transpose(np.matmul(activations[-2], np.transpose(G)))
    G = np.matmul(np.transpose(weightsT[-1]), G)
    # Restart Algorithm

    for n in reversed(range(0, num_layers - 2)):
        G = np.multiply(G, sigmoid_prime(pre_act[n + 1]))
        nabla_b[n] = G
        nabla_wT[n] = np.transpose(np.matmul(activations[n], np.transpose(G)))
        G = np.matmul(np.transpose(weightsT[n]), G)

    # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

    ###

    return (nabla_b, nabla_wT)

Beispiel #14

0

Datei anzeigen

Datei: bp.py Projekt: SreerajHub/DigitRecognitionUsingDifferentMethods

def backprop(x, y, biases, weights, cost, num_layers):
    """ function of backpropagation
        Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient of all biases and weights.

        Args:
            x, y: input image x and label y
            biases, weights (list): list of biases and weights of entire network
            cost (CrossEntropyCost): object of cost computation
            num_layers (int): number of layers of the network

        Returns:
            (nabla_b, nabla_w): tuple containing the gradient for all the biases
                and weights. nabla_b and nabla_w should be the same shape as 
                input biases and weights
    """
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]
    """"
    print("x.shape",x.shape)
    print("y",y.shape)
    print("b",biases[1].shape)
    print("l_w",len(weights))
    print("w",weights[1].shape)
    """

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###
    index = 0
    activations = [np.zeros(b.shape) for b in biases]
    input = x
    for b, w in zip(biases, weights):
        activations[index] = sigmoid(np.dot(w, x) + b)
        x = activations[index]
        index += 1

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer
    delta = (cost).delta(activations[-1], y)
    gradlayer = delta / (activations[-1] * (1 - activations[-1]))

    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###
    for y in range(0, len(activations)):
        gradactivation = np.multiply(
            gradlayer,
            sigmoid_prime(
                np.log((activations[-1 - y]) / (1 - activations[-1 - y]))))
        nabla_b[-1 - y] = gradactivation
        if (y == (len(activations) - 1)):
            activation = input
        else:
            activation = activations[-2 - y]
        nabla_w[-1 - y] = np.dot(gradactivation, activation.transpose())
        gradlayer = np.dot(weights[-1 - y].transpose(), gradactivation)

    return (nabla_b, nabla_w)