def test_output_relu_backward(self):
     print_test("Testing relu backward function:")
     np.random.seed(395)
     x = np.random.randn(10, 10)
     dout = np.random.randn(*x.shape)
     dx_num = eval_numerical_gradient_array(
         lambda x: layers.relu_forward(x), x, dout)
     dx = layers.relu_backward(dout, x)
     dx_e = rel_error(dx, dx_num)
     print('dX relative difference:', dx_e)
     self.assertTrue(dx_e <= 1e-11)
예제 #2
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:        print()

        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.param s, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        hidden_num = self.num_layers - 1
        scores = X
        cache_history = []
        L2reg = 0
        cache_history.append(scores)
        #
        for i in range(hidden_num):
            scores = linear_forward(scores, self.params['W%d' % (i + 1)],
                                    self.params['b%d' % (i + 1)])
            cache_history.append(scores)
            scores = relu_forward(scores)
            cache_history.append(scores)
            if self.use_dropout:

                scores, cache = dropout_forward(scores,
                                                self.dropout_params["p"],
                                                self.dropout_params["train"],
                                                self.dropout_params["seed"])

                cache_history.append(cache)
            L2reg += np.sum(self.params['W%d' % (i + 1)]**2)

        i += 1
        scores = linear_forward(scores, self.params['W%d' % (i + 1)],\
                                                self.params['b%d' % (i + 1)])

        L2reg += np.sum(self.params['W%d' % (i + 1)]**2)
        L2reg *= 0.5 * self.reg
        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        loss, dout = softmax(scores, y)
        loss += L2reg

        dout, grads['W%d' % (i + 1)], grads['b%d' % (i + 1)] = linear_backward(
            dout, cache_history.pop(), self.params['W%d' % (i + 1)],
            self.params['b%d' % (i + 1)])
        grads['W%d' % (i + 1)] += self.reg * self.params['W%d' % (i + 1)]

        i -= 1
        while i >= 0:
            if self.use_dropout:
                dout = dropout_backward(dout, cache_history.pop())

            dout = relu_backward(dout, cache_history.pop())
            dout, grads['W%d' %
                        (i + 1)], grads['b%d' % (i + 1)] = linear_backward(
                            dout, cache_history.pop(),
                            self.params['W%d' % (i + 1)],
                            self.params['b%d' % (i + 1)])

            grads['W%d' % (i + 1)] += self.reg * self.params['W%d' % (i + 1)]
            i -= 1

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
예제 #3
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        inp = X
        for i in range(1, self.num_layers):
            W, b = self.params["W%d" % i], self.params["b%d" % i]
            linOut = linear_forward(inp, W, b)
            linear_cache["O%d" % i] = inp
            reluOut = relu_forward(linOut)
            relu_cache["O%d" % i] = linOut
            inp = reluOut
            if self.use_dropout:
                dropOut, dropMask = dropout_forward(reluOut,
                                                    **self.dropout_params)
                dropout_cache["O%d" % i] = dropMask
                inp = dropOut
        W, b = self.params["W%d" % (i + 1)], self.params["b%d" % (i + 1)]
        scores = linear_forward(inp, W, b)

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        loss, dout = softmax(scores, y)
        for k in range(1, self.num_layers + 1):
            loss += 0.5 * self.reg * (self.params["W%d" % k]**2).sum()

        dX, dW, db = linear_backward(dout, inp, W, b)
        grads["W%d" % (i + 1)], grads["b%d" % (i + 1)] = dW + self.reg * W, db

        for i in range(self.num_layers - 1, 0, -1):
            if self.use_dropout:
                dX = dropout_backward(dX, dropout_cache["O%d" % i],
                                      **self.dropout_params)
            reluBack = relu_backward(dX, relu_cache["O%d" % i])
            W, b = self.params["W%d" % i], self.params["b%d" % i]
            dX, dW, db = linear_backward(reluBack, linear_cache["O%d" % i], W,
                                         b)
            grads["W%d" % i], grads["b%d" % i] = dW + self.reg * W, db

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
예제 #4
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        x_in = X
        # If y is None then we are in test mode so just return scores
        if y is None:
            for i in range(1, self.num_layers):
                linear_out = linear_forward(x_in, self.params['W' + str(i)], self.params['b' + str(i)])
                relu_out = relu_forward(linear_out)
                x_in = relu_out
            logits = linear_forward(x_in, self.params['W' + str(self.num_layers)],
                                    self.params['b' + str(self.num_layers)])
            scores = softmax(logits, y)
            # If y is not None then we are in train mode so just return scores
            return scores
        else:
            self.last_grads_ = copy.deepcopy(self.grads_)
            loss, grads = 0, dict()
            self.grads_ = grads
            for i in range(1, self.num_layers):
                linear_out = linear_forward(x_in, self.params['W' + str(i)], self.params['b' + str(i)])
                linear_cache["out" + str(i)] = linear_out
                relu_out = relu_forward(linear_out)
                relu_cache["out" + str(i)] = relu_out
                dropout_out = relu_out
                if self.use_dropout:
                    seed = 42 if self.dropout_params["seed"] is None else self.dropout_params["seed"]
                    dropout_out, mask = dropout_forward(relu_out, self.dropout_params["p"], True, seed)
                    dropout_cache["out" + str(i)] = dropout_out
                    dropout_cache["mask" + str(i)] = mask
                x_in = dropout_out
            logits = linear_forward(x_in, self.params['W' + str(self.num_layers)],
                                    self.params['b' + str(self.num_layers)])
            loss, dlogits = softmax(logits, y)

            #######################################################################
            #                            END OF YOUR CODE                         #
            #######################################################################
            # """
            # TODO: Implement the backward pass for the fully-connected net. Store
            # the loss in the loss variable and all gradients in the grads
            # dictionary. Compute the loss with softmax. grads[k] has the gradients
            # for self.params[k]. Add L2 regularisation to the loss function.
            # NOTE: To ensure that your implementation matches ours and you pass the
            # automated tests, make sure that your L2 regularization includes a
            # factor of 0.5 to simplify the expression for the gradient.
            # """
            #######################################################################
            #                           BEGIN OF YOUR CODE                        #
            #######################################################################

            dout = dlogits
            if self.use_dropout:
                input_x = dropout_cache["out" + str(self.num_layers - 1)]
            else:
                input_x = relu_cache["out" + str(self.num_layers - 1)]
            W = self.params['W' + str(self.num_layers)]
            b = self.params['b' + str(self.num_layers)]
            dout, dW, db = linear_backward(dout, input_x, W, b)
            grads['W' + str(self.num_layers)] = dW
            grads['b' + str(self.num_layers)] = db
            for i in range(self.num_layers - 1, 1, -1):
                if self.use_dropout:
                    mask = dropout_cache['mask' + str(i)]
                    dout = dropout_backward(dout, mask, seed, self.dropout_params["p"])
                input_x = linear_cache["out" + str(i)]
                dout = relu_backward(dout, input_x)
                if self.use_dropout:
                    input_x = dropout_cache["out" + str(i - 1)]
                else:
                    input_x = relu_cache["out" + str(i - 1)]
                W = self.params['W' + str(i)]
                b = self.params['b' + str(i)]
                dout, dW, db = linear_backward(dout, input_x, W, b)
                grads['W' + str(i)] = dW
                grads['b' + str(i)] = db
            if self.use_dropout:
                mask = dropout_cache['mask' + str(1)]
                dout = dropout_backward(dout, mask, seed,
                                        self.dropout_params["p"])
            input_x = linear_cache["out" + str(1)]
            dout = relu_backward(dout, input_x)
            W = self.params['W' + str(1)]
            b = self.params['b' + str(1)]
            dout, dW, db = linear_backward(dout, X, W, b)
            grads['W' + str(1)] = dW
            grads['b' + str(1)] = db

            # add L2 regularisation
            regularisation = 0.0
            for i in range(1, self.num_layers + 1):
                tempW = 0.5 * self.reg * np.square(self.params['W' + str(i)])
                regularisation += np.sum(tempW)
                grads['W' + str(i)] += self.reg * self.params['W' + str(i)]
            loss += regularisation
            #######################################################################
            #                            END OF YOUR CODE                         #
            #######################################################################
            return loss, grads
예제 #5
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i]

        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """

        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """

        if self.use_dropout:
            self.dropout_params["train"] = False if y is None else True

        Xi = linear_cache['0'] = X
        if self.use_dropout:
            p, t = self.dropout_params["p"],     \
                      self.dropout_params["train"]
        for i in range(self.num_layers):
            W, b = self.params['W' + str(i + 1)], self.params['b' + str(i + 1)]
            Xi = relu_cache[str(i)] = linear_forward(Xi, W, b)
            if i != self.num_layers - 1:
                Xi = linear_cache[str(i + 1)] = relu_forward(Xi)
                if self.use_dropout:
                    # receive (out, mask)
                    Xi, dropout_cache[str(i)] = dropout_forward(Xi, \
                            p, t, None)
        scores = Xi

        # If y is None then we are in test mode, so just return scores
        if y is None: return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """

        loss, dX = softmax(scores, y)
        for i in reversed(range(self.num_layers)):
            if i != self.num_layers - 1:
                if self.use_dropout:
                    dX = dropout_backward(dX, \
                            dropout_cache[str(i)], p=p, train=t)
                dX = relu_backward(dX, relu_cache[str(i)])
            W, b = self.params['W' + str(i + 1)], self.params['b' + str(i + 1)]
            dX, dW, db = linear_backward(dX, linear_cache[str(i)], W, b)
            grads['W' + str(i + 1)], grads['b' + str(i + 1)] = \
                    dW + self.reg * self.params['W' + str(i + 1)], db
            loss += 0.5 * self.reg * np.sum(W**2)

        return loss, grads
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """

        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """

        # get num of hidden layers (do not count the output layer)
        num_hidden_layers = self.num_layers - 1

        # store the first layer's input in the linear cache
        linear_cache[1] = X

        # iterate through all layers, including the class layer
        for i in range(1, self.num_layers + 1):

            # compute this layer's W and b keys
            Wkey, bkey = "W" + str(i), "b" + str(i)

            # perform linear pass. output has dimensions M x N. Store it in relu_cache
            relu_cache[i] = linear_forward(linear_cache[i], self.params[Wkey],
                                           self.params[bkey])

            # perform relu -> dropout
            if i < self.num_layers:

                # perform ReLU
                relu_out = relu_forward(relu_cache[i])

                # perform dropout
                out, mask = dropout_forward(relu_out,                         \
                                            self.dropout_params["p"],         \
                                            self.dropout_params["train"],     \
                                            self.dropout_params["seed"])

                # add the mask to the dropout cache
                dropout_cache[i] = mask

                # cache this layer's output as input to the next layer in the linear cache
                linear_cache[i + 1] = out

        # final layer output is stored in the relu_cache since it did not go through dropout or ReLU
        scores = relu_cache[self.num_layers]

        # if y is None then we are in test mode so just return scores
        if y is None:
            return scores
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """

        # used to store gradients
        grads = dict()

        # perform softmax. should I log scores?
        loss, dlogits = softmax(scores, y)

        # used to store the upstream derivate of the next layer
        dout = dlogits

        for i in range(self.num_layers, 0, -1):

            # compute this layer's W and b names
            Wkey, bkey = "W" + str(i), "b" + str(i)

            # add L2 regularisation. square each weight of this layer and add it to loss
            loss += 0.5 * self.reg * np.sum(self.params[Wkey]**2)

            if i < self.num_layers:

                # perform dropout
                dout = dropout_backward(dout, dropout_cache[i],
                                        self.dropout_params["p"],
                                        self.dropout_params["train"])

                # perform ReLU
                dout = relu_backward(dout, relu_cache[i])

            # perform linear backward and store the gradients
            dX, dW, db = linear_backward(dout, linear_cache[i],
                                         self.params[Wkey], self.params[bkey])

            # d(E_0 + 0.5 * reg * W_all^2)/dW_i = d(E_o)/dW_i + reg * W_i
            # dW holds d(E_0/dW_i), so we must add the reg * W_i term ourselves
            grads.update({Wkey: dW + self.reg * self.params[Wkey], bkey: db})

            # set dout equal to dX
            dout = dX

        return loss, grads
예제 #7
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        out = X.copy()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        if y is None:  #mmmmmmmmodified
            train = False
        else:
            train = True
        scores = out

        for i in range(1, self.num_layers):

            linear_cache[str(i)] = scores

            scores = linear_forward(X=scores,
                                    W=self.params['W' + str(i)],
                                    b=self.params['b' + str(i)])

            relu_cache[str(i)] = scores

            scores = relu_forward(scores)

            if self.use_dropout:

                if "seed" in self.dropout_params:  #mmmmmmmmodified

                    scores, mask = dropout_forward(
                        scores,
                        p=self.dropout_params["p"],
                        train=train,
                        seed=self.dropout_params["seed"])
                else:
                    scores, mask = dropout_forward(scores,
                                                   p=self.dropout_params["p"],
                                                   train=train)

                dropout_cache[str(i)] = mask

        linear_cache[str(self.num_layers)] = scores

        scores = linear_forward(X=scores,
                                W=self.params['W' + str(self.num_layers)],
                                b=self.params['b' + str(self.num_layers)])

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        loss, dscore = softmax(scores, y)

        regularization = 0
        #add regularization to loss
        for i in range(1, self.num_layers + 1):

            regularization += np.sum(self.params['W' + str(i)]**2)

        loss += 0.5 * self.reg * regularization

        #backward through last w and b
        dhidden_layer,grads['W' + str(self.num_layers)],grads['b' + str(self.num_layers)] = \
        linear_backward(dscore, linear_cache[str(self.num_layers)], self.params['W' + str(self.num_layers)], self.params['b' + str(self.num_layers)])

        #add regularization to W
        grads['W' + str(self.num_layers)] += self.reg * self.params[
            'W' + str(self.num_layers)]

        #backward from all other layers
        for i in range(self.num_layers - 1, 0, -1):

            if self.use_dropout:

                dhidden_layer = dropout_backward(dhidden_layer,
                                                 dropout_cache[str(i)],
                                                 self.dropout_params["p"],
                                                 self.dropout_params["train"])

            dhidden_layer = relu_backward(dhidden_layer, relu_cache[str(i)])

            dhidden_layer, grads['W' + str(i)],grads['b' + str(i)] = \
            linear_backward(dhidden_layer, linear_cache[str(i)], self.params['W' + str(i)], self.params['b' + str(i)])

            grads['W' + str(i)] += self.reg * self.params['W' + str(i)]

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
예제 #8
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        linear_cache[0] = relu_cache[0] = dropout_cache[0] = X
        for i in range(1, self.num_layers + 1):
            curr_id = str(i)
            W_id = 'W' + curr_id
            b_id = 'b' + curr_id

            # if current layer is not the output layer
            if i < self.num_layers:

                # linear regression
                if not self.use_dropout:
                    prev_layer_output = relu_cache[i - 1]
                else:
                    prev_layer_output = dropout_cache[i - 1]

                linear_cache[i] = linear_forward(prev_layer_output,
                                                 self.params[W_id],
                                                 self.params[b_id])

                # relu activation
                relu_cache[i] = relu_forward(linear_cache[i])

                # dropout regularization
                if self.use_dropout:
                    dropout_cache[i] = dropout_forward(
                        relu_cache[i], self.dropout_params['p'],
                        self.params[W_id], self.params[b_id])

            # if current layer is the output layer
            else:

                # output layer outputs the estimate result of nn, scores
                if not self.use_dropout:
                    prev_layer_output = relu_cache[i - 1]
                else:
                    prev_layer_output = dropout_cache[i - 1]

                scores = linear_forward(prev_layer_output, self.params[W_id],
                                        self.params[b_id])

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        # use softmax to produce intermediate results, loss and dlogits
        loss, dlogits = softmax(scores, y)

        # iterate backward, output layer to input layer via hidden layers
        for i in range(self.num_layers, 0, -1):

            # set variable names
            curr_id = str(i)
            W_id = 'W' + curr_id
            b_id = 'b' + curr_id

            # L2 regularization
            loss += 0.5 * self.reg * np.sum(self.params[W_id]**2)

            # retrieve the result of upper layer from cache, prev_layer_output
            prev_layer_output = relu_cache[
                i - 1] if not self.use_dropout else dropout_cache[i - 1][0]

            # if current layer is the output layer
            if i == self.num_layers:

                # perform linear bacward regression to update grads for W and b
                dX, grads[W_id], grads[b_id] = linear_backward(
                    dlogits, prev_layer_output, self.params[W_id],
                    self.params[b_id])

            # if current layer is not the output layer
            else:

                # dropout
                if self.use_dropout:
                    mask = dropout_cache[i][1]
                    p, train = self.dropout_params['p'], self.dropout_params[
                        'train']

                    dX = dropout_backward(relu_cache[i], mask, p, train)

                # relu
                dX = relu_backward(dX, linear_cache[i])

                # linear
                dX, grads[W_id], grads[b_id] = linear_backward(
                    dX, prev_layer_output, self.params[W_id],
                    self.params[b_id])

            #
            grads[W_id] += self.reg * self.params[W_id]

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        train = True if y is not None else False

        if self.use_dropout:
            p = self.dropout_params["p"]
            seed = self.dropout_params["seed"]

        for layer in range(1, self.num_layers + 1):
            W = self.params["W" + str(layer)]
            b = self.params["b" + str(layer)]
            input = X if layer == 1 else (
                dropout_cache[layer -
                              1][0] if self.use_dropout else relu_cache[layer -
                                                                        1])
            if layer != self.num_layers:
                linear_cache[layer] = linear_forward(input, W, b)
                relu_cache[layer] = relu_forward(linear_cache[layer])
                if self.use_dropout:
                    dropout_cache[layer] = dropout_forward(
                        relu_cache[layer], p, train, seed)
            else:
                scores = linear_forward(input, W, b)

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if not train:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        loss, dscores = softmax(scores, y)
        loss += self.l2regular()

        for layer in reversed(range(1, self.num_layers + 1)):
            w_key = "W" + str(layer)
            b_key = "b" + str(layer)
            W = self.params[w_key]
            b = self.params[b_key]
            if layer == self.num_layers:
                input_linear = dropout_cache[
                    layer - 1][0] if self.use_dropout else relu_cache[layer -
                                                                      1]
                dX, dW, db = linear_backward(dscores, input_linear, W, b)
                dW += self.reg * W
            else:
                if self.use_dropout:
                    input_drop = relu_cache[layer]
                    mask = dropout_cache[layer][1]
                    dX = dropout_backward(dX, mask, p, train)
                input_relu = linear_cache[layer]
                dX = relu_backward(dX, input_relu)
                if layer == 1:
                    input_linear = X
                else:
                    input_linear = dropout_cache[
                        layer -
                        1][0] if self.use_dropout else relu_cache[layer - 1]
                dout = dX
                dX, dW, db = linear_backward(dout, input_linear, W, b)
                dW += self.reg * W
            grads[w_key] = dW
            grads[b_key] = db

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
예제 #10
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        # Forward pass: loop through all layers
        # store the linear activations, relu and masks
        for i in range(1, self.num_layers):
            if i == 1:
                linear_cache["L{}".format(i)] = linear_forward(
                    X, self.params["W{}".format(i)],
                    self.params["b{}".format(i)])
            else:
                if self.use_dropout:
                    linear_cache["L{}".format(i)] = linear_forward(
                        dropout_cache["D{}".format(i - 1)],
                        self.params["W{}".format(i)],
                        self.params["b{}".format(i)])
                else:
                    linear_cache["L{}".format(i)] = linear_forward(
                        relu_cache["R{}".format(i - 1)],
                        self.params["W{}".format(i)],
                        self.params["b{}".format(i)])

            relu_cache["R{}".format(i)] = relu_forward(
                linear_cache["L{}".format(i)])

            if self.use_dropout:
                s = None
                if 'seed' in self.dropout_params.keys():
                    s = self.dropout_params['seed']
                p = self.dropout_params["p"]
                t = self.dropout_params["train"]
                dropout_cache["D{}".format(i)], dropout_cache["M{}".format(
                    i)] = dropout_forward(relu_cache["R{}".format(i)],
                                          p=p,
                                          train=t,
                                          seed=s)

        #Linear final layer
        scores = None
        if self.use_dropout:
            scores = linear_forward(
                dropout_cache["D{}".format(self.num_layers - 1)],
                self.params["W{}".format(self.num_layers)],
                self.params["b{}".format(self.num_layers)])
        else:
            scores = linear_forward(
                relu_cache["R{}".format(self.num_layers - 1)],
                self.params["W{}".format(self.num_layers)],
                self.params["b{}".format(self.num_layers)])

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        # Compute the loss and gradients using softmax
        loss, dx = softmax(scores, y)

        # Apply L2 Regularisation
        for i in range(1, self.num_layers + 1):
            loss += (0.5 * self.reg) * np.sum(
                np.square(self.params["W{}".format(i)]))

        # Backwards pass: Final linear layer
        if self.use_dropout:
            dx, dW, db = linear_backward(
                dx, dropout_cache["D{}".format(self.num_layers - 1)],
                self.params["W{}".format(self.num_layers)],
                self.params["b{}".format(self.num_layers)])
        else:
            dx, dW, db = linear_backward(
                dx, relu_cache["R{}".format(self.num_layers - 1)],
                self.params["W{}".format(self.num_layers)],
                self.params["b{}".format(self.num_layers)])

        grads["W{}".format(self.num_layers)] = dW
        # L2 Regularisation
        grads["W{}".format(
            self.num_layers)] += self.reg * self.params["W{}".format(
                self.num_layers)]
        grads["b{}".format(self.num_layers)] = db

        # Loop backwards through the layers to the first layer
        for j in reversed(range(1, self.num_layers)):
            # Reverse dropout
            if self.use_dropout:
                dx = dropout_backward(dx,
                                      mask=dropout_cache["M{}".format(j)],
                                      p=self.dropout_params["p"],
                                      train=self.dropout_params["train"])

            # Relu backward pass with incoming Z value
            dx = relu_backward(dx, linear_cache["L{}".format(j)])

            # Linear pass with activation value of incoming layer
            if j == 1:
                dx, dW, db = linear_backward(dx, X,
                                             self.params["W{}".format(j)],
                                             self.params["b{}".format(j)])
            else:
                foo = None
                if self.use_dropout:
                    dx, dW, db = linear_backward(
                        dx, dropout_cache["D{}".format(j - 1)],
                        self.params["W{}".format(j)],
                        self.params["b{}".format(j)])
                else:
                    dx, dW, db = linear_backward(
                        dx, relu_cache["R{}".format(j - 1)],
                        self.params["W{}".format(j)],
                        self.params["b{}".format(j)])

            grads["W{}".format(j)] = dW
            # Regularisation
            grads["W{}".format(j)] += self.reg * self.params["W{}".format(j)]
            grads["b{}".format(j)] = db
        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads