예제 #1
0
  def loss(self, X, y=None):
    """
    Evaluate loss and gradient for the three-layer convolutional network.
    """
    W1 = self.params['W1']
    W2, b2 = self.params['W2'], self.params['b2']
    W3, b3 = self.params['W3'], self.params['b3']

    # pass pool_param to the forward pass for the max-pooling layer
    pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

    scores = None
    conv, cache1 = layers.conv_forward(X,W1)
    relu1, cache2 = layers.relu_forward(conv)
    maxp, cache3 = layers.max_pool_forward(relu1,pool_param)
    fc1, cache4 = layers.fc_forward(maxp,W2,b2)
    relu2, cache5 = layers.relu_forward(fc1)
    scores, cache6 = layers.fc_forward(relu2,W3,b3)

    if y is None:
      return scores

    loss, grads = 0, {}
    loss, dscores = layers.softmax_loss(scores,y)
    dx3, dW3, db3 = layers.fc_backward(dscores,cache6)
    dRelu2 = layers.relu_backward(dx3,cache5)
    dx2, dW2, db2 = layers.fc_backward(dRelu2,cache4)
    dmaxp = layers.max_pool_backward(dx2.reshape(maxp.shape),cache3)
    dRelu1 = layers.relu_backward(dmaxp,cache2)
    dx,dW1 = layers.conv_backward(dRelu1,cache1)
    
    grads = {'W1':dW1,'W2':dW2,'b2':db2,'W3':dW3,'b3':db3}

    return loss, grads
예제 #2
0
def affine_relu_backward(dout, cache):
    '''
    backward pass for the affine-relu convenience layer
    '''
    fc_cache, relu_cache = cache
    da = layers.relu_backward(dout, relu_cache)
    dx, dw, db = layers.affine_backward(da, fc_cache)
    return dx, dw, db
def test_relulayer():
    x = np.random.randn(10, 10)
    dout = np.random.randn(*x.shape)

    dx_num = eval_numerical_gradient_array(lambda x: layers.relu_forward(x)[0], x, dout)
    _, cache = layers.relu_forward(x)
    dx = layers.relu_backward(dout, cache)

    # The error should be around 1e-12
    print 'Testing relu layers:'
    print 'dx error: ', rel_error(dx_num, dx)
def test_relulayer():
    x = np.random.randn(10, 10)
    dout = np.random.randn(*x.shape)

    dx_num = eval_numerical_gradient_array(lambda x: layers.relu_forward(x)[0],
                                           x, dout)
    _, cache = layers.relu_forward(x)
    dx = layers.relu_backward(dout, cache)

    # The error should be around 1e-12
    print 'Testing relu layers:'
    print 'dx error: ', rel_error(dx_num, dx)
예제 #5
0
    def test_relu_backward(self):
        # ReLU layer: backward
        np.random.seed(498)
        x = np.random.randn(10, 10)
        dout = np.random.randn(*x.shape)

        dx_num = eval_numerical_gradient_array(
            lambda x: layers.relu_forward(x)[0], x, dout)

        _, cache = layers.relu_forward(x)
        dx = layers.relu_backward(dout, cache)

        # The error should be around 3e-12
        print('\nTesting relu_backward function:')
        print('dx error: ', rel_error(dx_num, dx))
        np.testing.assert_allclose(dx, dx_num, atol=1e-9)
예제 #6
0
 def backward(self, grad_scores, cache):
     grads = None
     #######################################################################
     # TODO: Implement the backward pass to compute gradients for all      #
     # learnable parameters of the model, storing them in the grads dict   #
     # above. The grads dict should give gradients for all parameters in   #
     # the dict returned by model.parameters().                            #
     #######################################################################
     cache11, cache12, cache2 = cache
     grad_out12, grad_W2, grad_b2 = fc_backward(grad_scores, cache2)
     grad_out11 = relu_backward(grad_out12, cache12)
     grad_X, grad_W1, grad_b1 = fc_backward(grad_out11, cache11)
     grads = {
         'W1': grad_W1,
         'b1': grad_b1,
         'W2': grad_W2,
         'b2': grad_b2,
     }
     #######################################################################
     #                          END OF YOUR CODE                           #
     #######################################################################
     return grads
예제 #7
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Inputs:
        - X: Array of input data of shape (N, d_in)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1, b1 = self.params['W1'], self.params['b1']
        W3, b3 = self.params['W3'], self.params['b3']
        N, d_in = X.shape

        scores = None
        f, cache1 = layers.fc_forward(X, W1, b1)  #fc
        h, cache2 = layers.relu_forward(f)  #relu
        scores, cache3 = layers.fc_forward(h, W3, b3)  #fc

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        loss, dscores = layers.softmax_loss(scores, y)
        dx2, dW3, db3 = layers.fc_backward(dscores, cache3)
        dx1 = layers.relu_backward(dx2, cache2)
        dx, dW1, db1 = layers.fc_backward(dx1, cache1)

        grads = {'W1': dW1, 'b1': db1, 'W3': dW3, 'b3': db3}

        return loss, grads
def affine_relu_backward(dout, cache):
    fc_cache, relu_cache = cache
    da = relu_backward(dout, relu_cache)
    dx, dw, db = affine_backward(da, fc_cache)
    return dx, dw, db
예제 #9
0
파일: fcnet.py 프로젝트: jiaxinchenxx/NN
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        VAL = X.copy()

        for i in range(1, self.num_layers):
            linear_cache['L{}'.format(i)] = linear_forward(
                VAL, self.params['W{}'.format(i)],
                self.params['b{}'.format(i)])
            relu_cache['R{}'.format(i)] = relu_forward(
                linear_cache['L{}'.format(i)])
            if self.use_dropout:
                dropout_cache['D{}'.format(i)], dropout_cache['MASK{}'.format(i)] = dropout_forward(relu_cache['R{}'.format(i)],\
                                                                 self.dropout_params['p'], self.dropout_params['train'],\
                                                                 self.dropout_params['seed'])
                VAL = dropout_cache['D{}'.format(i)]
            else:
                VAL = relu_cache['R{}'.format(i)]


        linear_cache['L{}'.format(self.num_layers)] = linear_forward(VAL, self.params['W{}'.format(self.num_layers)],\
                                                           self.params['b{}'.format(self.num_layers)])

        scores = linear_cache['L{}'.format(self.num_layers)]

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        loss, grad = softmax(scores, y)

        if self.use_dropout:
            VAR = dropout_cache['D{}'.format(self.num_layers - 1)]
        else:
            VAR = relu_cache['R{}'.format(self.num_layers - 1)]

        dX, grads['W{}'.format(self.num_layers)], grads['b{}'.format(self.num_layers)] = linear_backward(grad, \
            VAR, self.params['W{}'.format(self.num_layers)],self.params['b{}'.format(self.num_layers)])

        grads['W{}'.format(
            self.num_layers)] += self.reg * self.params['W{}'.format(
                self.num_layers)]

        loss += 0.5 * self.reg * np.sum(self.params['W' + str(self.num_layers)]
                                        **2)

        for inx in range(self.num_layers - 1, 0, -1):
            if self.use_dropout:
                dX = dropout_backward(dX, dropout_cache['MASK{}'.format(inx)],
                                      self.dropout_params['p'])

            dX = relu_backward(dX, linear_cache['L' + str(inx)])

            if inx - 1 != 0:
                if self.use_dropout:
                    pre_layer = dropout_cache['D{}'.format(inx - 1)]
                else:
                    pre_layer = relu_cache['R{}'.format(inx - 1)]
                dX, grads['W' +
                          str(inx)], grads['b' + str(inx)] = linear_backward(
                              dX, pre_layer, self.params['W{}'.format(inx)],
                              self.params['b{}'.format(inx)])

                grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)]
                loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2)

            else:

                dX, grads['W' +
                          str(inx)], grads['b' + str(inx)] = linear_backward(
                              dX, X, self.params['W{}'.format(inx)],
                              self.params['b{}'.format(inx)])
                grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)]
                loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2)

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
#   In the file layers.py implement the backward pass for the ReLU activation in  #
#   the relu_backward function.                                                   #
#   Once you are done you can test your implementation using the numeric gradient #
#   checking.                                                                     #
###################################################################################

# Test the relu_backward function

x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

if layers.relu_forward(x)[0] is not None:
  dx_num = eval_numerical_gradient_array(lambda x: layers.relu_forward(x)[0], x, dout)

  _, cache = layers.relu_forward(x)
  dx = layers.relu_backward(dout, cache)

# The error should be around 1e-12
  print 'Testing relu_backward function:'
  print 'dx error: (should be around 1e-12): ', rel_error(dx_num, dx)


###################################################################################
#   Sandwich layers                                                               #
###################################################################################
#   There are some common patterns of layers that are frequently used in          #
#   neural nets. For example, affine layers are frequently followed by a          #
#   ReLU nonlinearity. To make these common patterns easy, we define              #
#   several convenience layers in the file layer_utils.py.  For now               #
#   take a look at the affine_relu_forward and affine_relu_backward               #
#   functions, and run the following to numerically gradient check the            #
예제 #11
0
    def loss(self,X,y=None):
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        if self.use_dropout :
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None


        inputi = X
        batch_size = X.shape[0]
        X = np.reshape(X,[batch_size,-1])

        fc_cache_list = []
        relu_cache_list = []
        bn_cache_list = []
        dropout_cache_list = []


        for i in range(self.num_layers-1):
            fc_act,fc_cache= affine_forward(X,self.params['W'+str(i+1)],self.params['b'+str(i+1)])
            fc_cache_list.append(fc_cache)
            if self.use_batchnorm:
                bn_act,bn_cache = batchnorm_forward(fc_act,self.params['gamma'+str(i+1)],self.params['beta'+str(i+1)],self.bn_params[i])
                bn_cache_list.append(bn_cache)
                relu_act,relu_cache = relu_forward(bn_act)
                relu_cache_list.append(relu_cache)
            else:
                relu_act,relu_cache = relu_forward(fc_act)
                relu_cache_list.append(relu_cache)
            if self.use_dropout:
                relu_act,dropout_cache = dropout_forward(relu_act,self.dropout_param)
                dropout_cache_list.append(dropout_cache)

            X = relu_act.copy()
        ########最后一层
        scores,final_cache = affine_forward(X,self.params['W'+str(self.num_layers)],self.params['b'+str(self.num_layers)])
        #
        # for layer in range(self.num_layers):
        #     Wi,bi = self.params['W%d'%(layer+1)],self.params['b%d'%(layer+1)]
        #     outi,fc_cachei = affine_forward(inputi,Wi,bi)
        #     fc_cache_list.append(fc_cachei)
        #
        #     if self.use_batchnorm and layer!=self.num_layers-1:
        #         gammai,betai = self.params['gamma%d'%(layer+1)],self.params['beta%d'%(layer+1)]
        #
        #         outi,bn_cachei = batchnorm_forward(outi,gammai,betai,self.bn_params[layer])
        #         bn_cache_list.append(bn_cachei)
        #     outi,relu_cachei = relu_forward(outi)
        #     relu_cache_list.append(relu_cachei)
        #
        #     if self.use_dropout:
        #         outi,dropout_cachei = dropout_forward(outi,self.dropout_param)
        #         dropout_cache_list.append(dropout_cachei)
        #
        #     inputi = outi
        #
        # scores = outi

        if mode == 'test':
            return scores

        loss,grads = 0.0,{}

        loss,dsoft = softmax_loss(scores,y)
        loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(self.num_layers)])))
        #########最后一层的反向传播
        dx_last,dw_last,db_last = affine_backward(dsoft,final_cache)
        grads['W'+str(self.num_layers)] = dw_last+self.reg*self.params['W'+str(self.num_layers)]
        grads['b'+str(self.num_layers)] = db_last

        for i in range(self.num_layers-1,0,-1):

            if self.use_dropout:
                dx_last = dropout_backward(dx_last,dropout_cache_list[i-1])

            drelu = relu_backward(dx_last,relu_cache_list[i-1])
            if self.use_batchnorm:
                dbatchnorm,dgamma,dbeta = batchnorm_backward(drelu,bn_cache_list[i-1])
                dx_last,dw_last,db_last = affine_backward(dbatchnorm,fc_cache_list[i-1])
                grads['beta'+str(i)] = dbeta
                grads['gamma'+str(i)] = dgamma
            else:
                dx_last,dw_last,db_last = affine_backward(drelu,fc_cache_list[i-1])

            grads['W'+str(i)] = dw_last+self.reg*self.params['W'+str(i)]
            grads['b'+str(i)] = db_last

            loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(i)])))

        return loss,grads
예제 #12
0
print('Testing relu_forward function:')
print('difference: ', rel_error(out, correct_out))
#######################################################################################

#######################################################################################
# Test the relu_backward function
#######################################################################################
from layers import relu_backward

x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout)

_, cache = relu_forward(x)
dx = relu_backward(dout, cache)

# The error should be around 1e-12
print('Testing relu_backward function:')
print('dx error: ', rel_error(dx_num, dx))
#######################################################################################

#######################################################################################
# Test the sandwitch layers
#    -If you implemented affine and relu functions properly,
#     you would have no problem with this code
#######################################################################################
from layer_utils import affine_relu_forward
from layer_utils import affine_relu_backward

np.random.seed(231)
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        # input -> first hidden layer
        linear_cache[1] = linear_forward(X, self.params["W1"],
                                         self.params["b1"])
        input_next = relu_forward(linear_cache[1])
        relu_cache[1] = input_next.copy()

        # hidden layer i -> hidden layer i+1
        for l in range(2, self.num_layers):
            if self.use_dropout:
                input_next, dropout_cache[l - 1] = self.apply_forward_dropout(
                    input_next)

            linear_cache[l] = linear_forward(input_next,
                                             self.params["W%d" % l],
                                             self.params["b%d" % l])
            input_next = relu_forward(linear_cache[l])
            relu_cache[l] = input_next.copy()

        # last hidden layer -> output layer
        if self.use_dropout:
            input_next, dropout_cache[
                self.num_layers - 1] = self.apply_forward_dropout(input_next)

        linear_cache[self.num_layers] = linear_forward(
            input_next, self.params["W%d" % self.num_layers],
            self.params["b%d" % self.num_layers])
        scores = linear_cache[self.num_layers].copy()
        # scores = scores / np.abs(scores).sum(axis=1)[:, None]
        #print(scores)
        # print(scores.shape)
        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################
        loss, dlogits = softmax(scores, y)
        # add L2 regularization
        loss += 0.5 * self.reg * np.sum([
            np.sum(self.params["W%d" % l]**2)
            for l in range(1, self.num_layers + 1)
        ])

        # last hidden layer <- output layer
        dX_, dW_, db_ = linear_backward(dlogits,
                                        relu_cache[self.num_layers - 1],
                                        self.params["W%d" % self.num_layers],
                                        self.params["b%d" % self.num_layers])
        # add regularization effect to W
        grads["W%d" %
              self.num_layers] = dW_ + self.reg * self.params["W%d" %
                                                              self.num_layers]
        grads["b%d" % self.num_layers] = db_.copy()

        # hidden layer i <- hidden layer i+1
        for l in reversed(range(2, self.num_layers)):
            if self.use_dropout:
                dX_ = self.apply_backward_dropout(dX_, dropout_cache[l])
            dX_ = relu_backward(dX_, linear_cache[l])
            dX_, dW_, db_ = linear_backward(dX_, relu_cache[l - 1],
                                            self.params["W%d" % l],
                                            self.params["b%d" % l])
            # add regularization effect to W
            grads["W%d" % l] = dW_ + self.reg * self.params["W%d" % l]
            grads["b%d" % l] = db_
        # input layer <- first hidden layer
        if self.use_dropout:
            dX_ = self.apply_backward_dropout(dX_, dropout_cache[1])
        dX_ = relu_backward(dX_, linear_cache[1])
        dX_, dW_, db_ = linear_backward(dX_, X, self.params["W1"],
                                        self.params["b1"])
        # add regularization effect to W
        grads["W1"] = dW_ + self.reg * self.params["W1"]
        grads["b1"] = db_
        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads