예제 #1
0
    def backward(self, dy):
        """
        Performs the backward pass of the model.

        :param dy: N x 1 array. The gradient wrt the output of the network.
        :return: Gradients of the model output wrt the model weights
        """

        # Note that last layer has no activation
        cache_affine = self.cache['affine' + str(self.num_layer)]
        dh, dW, db = affine_backward(dy, cache_affine)
        self.grads['W' + str(self.num_layer)] = \
            dW + 2 * self.reg_strength * self.params['W' + str(self.num_layer)]
        self.grads['b' + str(self.num_layer)] = db

        # The rest sandwich layers
        for i in range(self.num_layer - 2, -1, -1):
            # Unpack cache
            cache_sigmoid = self.cache['sigmoid' + str(i + 1)]
            cache_affine = self.cache['affine' + str(i + 1)]

            # Activation backward
            dh = self.activation.backward(dh, cache_sigmoid)

            # Affine backward
            dh, dW, db = affine_backward(dh, cache_affine)

            # Refresh the gradients
            self.grads['W' + str(i + 1)] = dW + 2 * self.reg_strength * \
                                           self.params['W' + str(i + 1)]
            self.grads['b' + str(i + 1)] = db

        return self.grads
예제 #2
0
    def backward(self, dy):
        ########################################################################
        # TODO:  Your backward here                                            #
        ########################################################################
        cache_affine = self.cache['affine' + str(self.num_layer)]
        dh, dW, db = affine_backward(dy, cache_affine)
        self.grads['W' + str(self.num_layer)] = dW + 2 * self.reg_strength * self.params['W' + str(self.num_layer)]
        self.grads['b' + str(self.num_layer)] = db

        for i in range(self.num_layer - 2, -1, -1):
            cache_Relu = self.cache['Relu' + str(i + 1)]
            # cache_Tanh = self.cache['Tanh' + str(i + 1)]
            # cache_LRelu = self.cache['LeakyRelu' + str(i + 1)]
            cache_affine = self.cache['affine' + str(i + 1)]

            dh = self.activation.backward(dh, cache_Relu)
            # dh = self.activation.backward(dh, cache_Tanh)
            # dh = self.activation.backward(dh, cache_LRelu)

            dh, dW, db = affine_backward(dh, cache_affine)

            self.grads['W' + str(i + 1)] = dW + 2 * self.reg_strength * self.params['W' + str(i + 1)]
            self.grads['b' + str(i + 1)] = db

        pass

        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        return self.grads
예제 #3
0
    def backward(self, dy):
        #grads = None
        ########################################################################
        # TODO:  Your backward here                                            #
        ########################################################################


        # Note that last layer has no activation
        cache_affine = self.cache['affine' + str(self.num_layer)]
        dh, dW, db = affine_backward(dy, cache_affine)
        self.grads['W' + str(self.num_layer)] = \
            dW + 2 * self.reg_strength * self.params['W' + str(self.num_layer)]
        self.grads['b' + str(self.num_layer)] = db

        # The rest sandwich layers
        for i in range(self.num_layer - 2, -1, -1):
            # Unpack cache
            cache_activation = self.cache[self.str_activation + str(i + 1)]
            cache_affine = self.cache['affine' + str(i + 1)]

            # Activation backward
            dh = self.activation.backward(dh, cache_activation)

            # Affine backward
            dh, dW, db = affine_backward(dh, cache_affine)

            # Refresh the gradients
            self.grads['W' + str(i + 1)] = dW + 2 * self.reg_strength * \
                                           self.params['W' + str(i + 1)]
            self.grads['b' + str(i + 1)] = db

        return self.grads
예제 #4
0
    def backward(self, dy):
        """
        Performs the backward pass of the model.

        :param dy: N x 1 array. The gradient wrt the output of the network.
        :return: Gradients of the model output wrt the model weights
        """

        # Unpack cache
        cache_affine1 = self.cache['affine1']
        cache_sigmoid = self.cache['sigmoid']
        cache_affine2 = self.cache['affine2']

        dW1 = None
        db1 = None
        dW2 = None
        db2 = None

        ########################################################################
        # TODO                                                                 #
        # Implement the backward pass using the layers you implemented.        #
        # Like the forward pass, it consists of 3 steps:                       #
        #   1. Backward the second affine layer                                #
        #   2. Backward the sigmoid layer                                      #
        #   3. Backward the first affine layer                                 #
        # You should now have the gradients wrt all model parameters           #
        ########################################################################

        # Backward second layer
        dh_, dW2, db2 = affine_backward(dy, cache_affine2)

        # Backward Activation function
        dh = sigmoid_backward(dh_, cache_sigmoid)

        # Backward first layer
        dx, dW1, db1 = affine_backward(dh, cache_affine1)

        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################

        self.grads['W1'] = dW1
        self.grads['b1'] = db1
        self.grads['W2'] = dW2
        self.grads['b2'] = db2

        # calculate the number of operation and memory
        batch_size = dy.shape[0]
        self.num_operation = 2 * batch_size * self.input_size * self.hidden_size + \
            batch_size * self.hidden_size + 2 * batch_size * self.hidden_size * 1
        self.memory_backward = sys.getsizeof(dW1) + sys.getsizeof(
            db1) + sys.getsizeof(dW2) + sys.getsizeof(db2)
        self.memory = self.memory_forward + self.memory_backward

        return self.grads
예제 #5
0
    def backward(self, dy):
        grads = None
        ########################################################################
        # TODO:  Your backward here                                            #
        ########################################################################
        cache_affine = self.cache['affine' + str(self.num_layer)]
        dh, dW, db = affine_backward(dy, cache_affine)
        self.grads['W' + str(self.num_layer)] = \
            dW + 2 * self.reg_strength * self.params['W' + str(self.num_layer)]
        self.grads['b' + str(self.num_layer)] = db

        # The rest sandwich layers
        for i in range(self.num_layer - 2, -1, -1):
            # Unpack cache
            cache_sigmoid = self.cache['sigmoid' + str(i + 1)]
            cache_affine = self.cache['affine' + str(i + 1)]

            # Activation backward
            dh = self.activation.backward(dh, cache_sigmoid)

            # Affine backward
            dh, dW, db = affine_backward(dh, cache_affine)

            # Refresh the gradients
            self.grads['W' + str(i + 1)] = dW + 2 * self.reg_strength * \
                                           self.params['W' + str(i + 1)]
            self.grads['b' + str(i + 1)] = db

        

        pass

        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        return self.grads