Beispiel #1
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1, W2, b1, b2 = self.params['W1'], self.params['W2'], self.params[
            'b1'], self.params['b2']
        hidden_out, cache1 = affine_relu_forward(X, W1, b1)
        scores, cache2 = affine_forward(hidden_out, W2, b2)

        if y is None:
            return scores

        grads = {}
        loss, dScore = softmax_loss(scores, y)
        loss += .5 * self.reg * (np.sum(W1**2) + np.sum(W2**2))
        dX2, grads['W2'], grads['b2'] = affine_backward(dScore, cache2)
        dX, grads['W1'], grads['b1'] = affine_relu_backward(dX2, cache1)
        grads['W2'] += self.reg * W2
        grads['W1'] += self.reg * W1
        return loss, grads
    def loss(self, x, y=None):
        """
            Loss function used is MSE loss
        """
        scores = None
        scores, cache1 = affine_relu_forward(x, self.params['W1'], self.params['b1'])
        scores, cache2 = affine_relu_forward(scores, self.params['W2'], self.params['b2'])
        scores, cache3 = affine_relu_forward(scores, self.params['W3'], self.params['b3'])
        scores, cache4 = affine_forward(scores, self.params['W4'], self.params['b4'])

        if y is None:
            return scores

        loss = mse_loss_forward(scores, y)

        grads = {}
        dup = mse_loss_backward(scores, y)
        dup, grads['W4'], grads['b4'] = affine_backward(dup, cache4)
        dup, grads['W3'], grads['b3'] = affine_relu_backward(dup, cache3)
        dup, grads['W2'], grads['b2'] = affine_relu_backward(dup, cache2)
        dup, grads['W1'], grads['b1'] = affine_relu_backward(dup, cache1)

        return loss, grads
    def train_loss(X, y, W1, W2, b1, b2):
      l1 = affine_relu_forward(X, W1, b1)
      l2 = affine_forward(l1, W2, b2)
      scores = l2

      if y is None:
        return scores
   
      #[TODO]: softmax is not supported yet
      # loss, d_scores = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      loss_with_reg = loss + np.sum(W1 ** 2) * 0.5 * self.reg + np.sum(W2 ** 2) * 0.5 * self.reg

      return loss_with_reg 
Beispiel #4
0
        def train_loss(X, y, W1, W2, b1, b2):
            l1 = affine_relu_forward(X, W1, b1)
            l2 = affine_forward(l1, W2, b2)
            scores = l2

            if y is None:
                return scores

            #[TODO]: softmax is not supported yet
            # loss, d_scores = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
                W2**2) * 0.5 * self.reg

            return loss_with_reg
#   There are some common patterns of layers that are frequently used in          #
#   neural nets. For example, affine layers are frequently followed by a          #
#   ReLU nonlinearity. To make these common patterns easy, we define              #
#   several convenience layers in the file layer_utils.py.  For now               #
#   take a look at the affine_relu_forward and affine_relu_backward               #
#   functions, and run the following to numerically gradient check the            #
#   backward pass.                                                                #
###################################################################################

x = np.random.randn(2, 3, 4)
theta = np.random.randn(12, 10)
theta_0 = np.random.randn(10)
dout = np.random.randn(2, 10)

if layers.affine_forward(x,theta,theta_0)[0] is not None:
  out, cache = layer_utils.affine_relu_forward(x, theta, theta_0)
  dx, dtheta, dtheta_0 = layer_utils.affine_relu_backward(dout, cache)

  dx_num = eval_numerical_gradient_array(lambda x: layer_utils.affine_relu_forward(x, theta, theta_0)[0], x, dout)
  dtheta_num = eval_numerical_gradient_array(lambda w: layer_utils.affine_relu_forward(x, theta, theta_0)[0], theta, dout)
  dtheta_0_num = eval_numerical_gradient_array(lambda b: layer_utils.affine_relu_forward(x, theta, theta_0)[0], theta_0, dout)

  print 'Testing affine_relu_forward:'
  print 'dx error: ', rel_error(dx_num, dx)
  print 'dtheta error: ', rel_error(dtheta_num, dtheta)
  print 'dtheta_0 error: ', rel_error(dtheta_0_num, dtheta_0)


###################################################################################
#   Loss layers: Softmax and SVM                                                  #
###################################################################################
Beispiel #6
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        # forward pass 1st hidden layer
        out, h1_cache = affine_relu_forward(
            X, self.params['W1'], self.params['b1'])
        # forward pass 2nd affine
        scores, h2_cache = affine_forward(
            out, self.params['W2'], self.params['b2'])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dsoft = softmax_loss(scores, y)
        regularized_term = 0.5 * self.reg * \
            (np.sum(self.params['W1']*self.params['W1']) +
             np.sum(self.params['W2'] * self.params['W2']))
        # regularized loss
        loss += regularized_term
        # backward pass to 2nd affine layer
        dx2, dW2, db2 = affine_backward(dsoft, h2_cache)
        # backward pass to 1st affine_relu_layer
        _, dW1, db1 = affine_relu_backward(dx2, h1_cache)
        # number of examples
        N = X.shape[0]
        grads['W2'] = dW2 / N + self.reg * self.params['W2']
        grads['b2'] = db2 / N
        grads['W1'] = dW1 / N + self.reg * self.params['W1']
        grads['b1'] = db1 / N
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        return loss, grads
Beispiel #7
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        # a vector of scores
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        forward_cache = []
        input = X
        # each rouand is an affine_relu_forward
        for layer in range(self.num_layers-1):
            weight_key = 'W'+str(layer+1)
            bias_key = 'b'+str(layer+1)
            out, cache = affine_relu_forward(
                input, self.params[weight_key], self.params[bias_key])
            forward_cache.append(cache)
            input = out
        # output layer affine_forward
        weight_key = 'W' + str(self.num_layers)
        bias_key = 'b' + str(self.num_layers)
        scores, cache = affine_forward(
            input, self.params[weight_key], self.params[bias_key])
        forward_cache.append(cache)
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dsoft = softmax_loss(scores, y)
        regularization_term = 0
        for L in range(self.num_layers):
            key = 'W' + str(L+1)
            regularization_term += np.sum(self.params[key] * self.params[key])
        loss += 0.5 * self.reg * regularization_term
        # N number of examples
        N = X.shape[0]
        dupstream = dsoft
        key_i = self.num_layers
        forward_cache.reverse()
        for index, cache in enumerate(forward_cache):
            # the last layer just perform affine backward
            if index == 0:
                dupstream, dw, db = affine_backward(dupstream, cache)
                grad_key = 'W' + str(key_i)
                grads[grad_key] = dw / N + self.reg * self.params[grad_key]
                grad_key = 'b' + str(key_i)
                grads[grad_key] = db / N
                key_i -= 1
            # hidden layer backpropagation, affine_relu_backward
            else:
                dupstream, dw, db = affine_relu_backward(dupstream, cache)
                grad_key = 'W' + str(key_i)
                grads[grad_key] = dw / N + self.reg * self.params[grad_key]
                grad_key = 'b' + str(key_i)
                grads[grad_key] = db / N
                key_i -= 1
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #8
0
#######################################################################################
# Test the sandwitch layers
#    -If you implemented affine and relu functions properly,
#     you would have no problem with this code
#######################################################################################
from layer_utils import affine_relu_forward
from layer_utils import affine_relu_backward

np.random.seed(231)
x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = affine_relu_forward(x, w, b)
dx, dw, db = affine_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(
    lambda x: affine_relu_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print('Testing affine_relu_forward:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))
#######################################################################################
Beispiel #9
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        cache = self.num_layers * [None]
        dropout_cache = (self.num_layers - 1) * [None]
        for i in np.arange(self.num_layers - 1):
            if not self.use_batchnorm:
                scores, cache[i] = affine_relu_forward(
                    X if i == 0 else scores, self.params['W%d' % (i + 1)],
                    self.params['b%d' % (i + 1)])
            else:
                scores, cache[i] = affine_bn_relu_forward(
                    X if i == 0 else scores, self.params['W%d' % (i + 1)],
                    self.params['b%d' % (i + 1)],
                    self.params['gamma%d' % (i + 1)],
                    self.params['beta%d' % (i + 1)], self.bn_params[i])
            if self.use_dropout:
                scores, dropout_cache[i] = dropout_forward(
                    scores, self.dropout_param)

        scores, cache[self.num_layers - 1] = affine_forward(
            scores, self.params['W%d' % self.num_layers],
            self.params['b%d' % self.num_layers])
        ############################################################################
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        loss, dscore = softmax_loss(scores, y)
        dx, grads['W%d' %
                  self.num_layers], grads['b%d' %
                                          self.num_layers] = affine_backward(
                                              dscore,
                                              cache[self.num_layers - 1])

        for i in reversed(np.arange(self.num_layers - 1)):
            if self.use_dropout:
                dx = dropout_backward(dx, dropout_cache[i])
            if not self.use_batchnorm:
                dx, grads['W%d' %
                          (i + 1)], grads['b%d' %
                                          (i + 1)] = affine_relu_backward(
                                              dx, cache[i])
            else:
                dx, grads['W%d' % (i+1)], grads['b%d' % (i+1)], grads['gamma%d' % (i+1)], grads['beta%d' % (i+1)] \
                    = affine_bn_relu_backward(dx, cache[i])

        for i in np.arange(self.num_layers):
            loss += .5 * self.reg * np.sum(
                np.square(self.params['W%d' % (i + 1)]))
            grads['W%d' % (i + 1)] += self.reg * self.params['W%d' % (i + 1)]
        ############################################################################
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        ############################################################################

        return loss, grads
#   There are some common patterns of layers that are frequently used in          #
#   neural nets. For example, affine layers are frequently followed by a          #
#   ReLU nonlinearity. To make these common patterns easy, we define              #
#   several convenience layers in the file layer_utils.py.  For now               #
#   take a look at the affine_relu_forward and affine_relu_backward               #
#   functions, and run the following to numerically gradient check the            #
#   backward pass.                                                                #
##########################################################################

x = np.random.randn(2, 3, 4)
theta = np.random.randn(12, 10)
theta_0 = np.random.randn(10)
dout = np.random.randn(2, 10)

if layers.affine_forward(x, theta, theta_0)[0] is not None:
    out, cache = layer_utils.affine_relu_forward(x, theta, theta_0)
    dx, dtheta, dtheta_0 = layer_utils.affine_relu_backward(dout, cache)

    dx_num = eval_numerical_gradient_array(
        lambda x: layer_utils.affine_relu_forward(x, theta, theta_0)[0], x, dout)
    dtheta_num = eval_numerical_gradient_array(
        lambda w: layer_utils.affine_relu_forward(x, theta, theta_0)[0], theta, dout)
    dtheta_0_num = eval_numerical_gradient_array(
        lambda b: layer_utils.affine_relu_forward(x, theta, theta_0)[0], theta_0, dout)

    print 'Testing affine_relu_forward:'
    print 'dx error: ', rel_error(dx_num, dx)
    print 'dtheta error: ', rel_error(dtheta_num, dtheta)
    print 'dtheta_0 error: ', rel_error(dtheta_0_num, dtheta_0)