Ejemplo n.º 1
0
  def loss(self, X, y=None):
    """
    Evaluate loss and gradient for the three-layer convolutional network.
    """
    W1 = self.params['W1']
    W2, b2 = self.params['W2'], self.params['b2']
    W3, b3 = self.params['W3'], self.params['b3']

    # pass pool_param to the forward pass for the max-pooling layer
    pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

    scores = None
    conv, cache1 = layers.conv_forward(X,W1)
    relu1, cache2 = layers.relu_forward(conv)
    maxp, cache3 = layers.max_pool_forward(relu1,pool_param)
    fc1, cache4 = layers.fc_forward(maxp,W2,b2)
    relu2, cache5 = layers.relu_forward(fc1)
    scores, cache6 = layers.fc_forward(relu2,W3,b3)

    if y is None:
      return scores

    loss, grads = 0, {}
    loss, dscores = layers.softmax_loss(scores,y)
    dx3, dW3, db3 = layers.fc_backward(dscores,cache6)
    dRelu2 = layers.relu_backward(dx3,cache5)
    dx2, dW2, db2 = layers.fc_backward(dRelu2,cache4)
    dmaxp = layers.max_pool_backward(dx2.reshape(maxp.shape),cache3)
    dRelu1 = layers.relu_backward(dmaxp,cache2)
    dx,dW1 = layers.conv_backward(dRelu1,cache1)
    
    grads = {'W1':dW1,'W2':dW2,'b2':db2,'W3':dW3,'b3':db3}

    return loss, grads
Ejemplo n.º 2
0
 def backward(self, train_data, y_true):
     loss, self.gradients["y"] = cross_entropy_loss(self.nurons["y"], y_true)
     self.gradients["W3"], self.gradients["b3"], self.gradients["z3_relu"] = fc_backward(self.gradients["y"],
                                                                                         self.weights["W3"],
                                                                                         self.nurons["z3_relu"])
     self.gradients["z3"] = relu_backward(self.gradients["z3_relu"], self.nurons["z3"])
     self.gradients["W2"], self.gradients["b2"], self.gradients["z2_relu"] = fc_backward(self.gradients["z3"],
                                                                                         self.weights["W2"],
                                                                                         self.nurons["z2_relu"])
     self.gradients["z2"] = relu_backward(self.gradients["z2_relu"], self.nurons["z2"])
     self.gradients["W1"], self.gradients["b1"], _ = fc_backward(self.gradients["z2"],
                                                                 self.weights["W1"],
                                                                 train_data)
     return loss
Ejemplo n.º 3
0
    def train(self):
        # 随机初始化参数
        W1 = np.random.randn(2, 3)
        b1 = np.zeros([3])
        loss = 100.0
        lr = 0.01
        i = 0

        while loss > 1e-15:
            x, y_true = self.next_sample(2)  # 获取当前样本
            # 前向传播
            y = fc_forward(x, W1, b1)
            # 反向传播更新梯度
            loss, dy = mean_squared_loss(y, y_true)
            dw, db, _ = fc_backward(dy, self.W, x)

            # 在一个batch中梯度取均值
            # print(dw)

            # 更新梯度
            W1 -= lr * dw
            b1 -= lr * db

            # 更新迭代次数
            i += 1
            if i % 1000 == 0:
                print("\n迭代{}次,当前loss:{}, 当前权重:{},当前偏置{}".format(i, loss, W1, b1))

                # 打印最终结果
        print("\n迭代{}次,当前loss:{}, 当前权重:{},当前偏置{}".format(i, loss, W1, b1))

        return W1, b1
Ejemplo n.º 4
0
    def test_fc_backward(self):
        # FC layer: backward
        np.random.seed(498)
        x = np.random.randn(10, 6)
        w = np.random.randn(6, 5)
        b = np.random.randn(5)
        dout = np.random.randn(10, 5)

        dx_num = eval_numerical_gradient_array(
            lambda x: layers.fc_forward(x, w, b)[0], x, dout)
        dw_num = eval_numerical_gradient_array(
            lambda w: layers.fc_forward(x, w, b)[0], w, dout)
        db_num = eval_numerical_gradient_array(
            lambda b: layers.fc_forward(x, w, b)[0], b, dout)

        _, cache = layers.fc_forward(x, w, b)
        dx, dw, db = layers.fc_backward(dout, cache)

        # The error should be around 1e-9
        print('\nTesting fc_backward function:')
        print('dx error: ', rel_error(dx_num, dx))
        print('dw error: ', rel_error(dw_num, dw))
        print('db error: ', rel_error(db_num, db))

        np.testing.assert_allclose(dx, dx_num, atol=1e-8)
        np.testing.assert_allclose(dw, dw_num, atol=1e-8)
        np.testing.assert_allclose(db, db_num, atol=1e-8)
Ejemplo n.º 5
0
 def backward(self, in_gradient):
     """
     梯度反向传播
     :param in_gradient: 后一层传递过来的梯度,[B,out_units]
     :return out_gradient: 传递给前一层的梯度,[B,in_units]
     """
     g_weight, g_bias, out_gradient = fc_backward(in_gradient, self.weight,
                                                  self.in_features)
     self.set_gradient('weight', g_weight)
     self.set_gradient('bias', g_bias)
     return out_gradient
Ejemplo n.º 6
0
 def backward(self, grad_scores, cache):
     grads = None
     #######################################################################
     # TODO: Implement the backward pass to compute gradients for all      #
     # learnable parameters of the model, storing them in the grads dict   #
     # above. The grads dict should give gradients for all parameters in   #
     # the dict returned by model.parameters().                            #
     #######################################################################
     cache11, cache12, cache2 = cache
     grad_out12, grad_W2, grad_b2 = fc_backward(grad_scores, cache2)
     grad_out11 = relu_backward(grad_out12, cache12)
     grad_X, grad_W1, grad_b1 = fc_backward(grad_out11, cache11)
     grads = {
         'W1': grad_W1,
         'b1': grad_b1,
         'W2': grad_W2,
         'b2': grad_b2,
     }
     #######################################################################
     #                          END OF YOUR CODE                           #
     #######################################################################
     return grads
Ejemplo n.º 7
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Inputs:
        - X: Array of input data of shape (N, d_in)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1, b1 = self.params['W1'], self.params['b1']
        W3, b3 = self.params['W3'], self.params['b3']
        N, d_in = X.shape

        scores = None
        f, cache1 = layers.fc_forward(X, W1, b1)  #fc
        h, cache2 = layers.relu_forward(f)  #relu
        scores, cache3 = layers.fc_forward(h, W3, b3)  #fc

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        loss, dscores = layers.softmax_loss(scores, y)
        dx2, dW3, db3 = layers.fc_backward(dscores, cache3)
        dx1 = layers.relu_backward(dx2, cache2)
        dx, dW1, db1 = layers.fc_backward(dx1, cache1)

        grads = {'W1': dW1, 'b1': db1, 'W3': dW3, 'b3': db3}

        return loss, grads
Ejemplo n.º 8
0
    def loss(self, features, captions):
        """
        Compute training-time loss for the RNN. We input image features and
        ground-truth captions for those images, and use an RNN to compute
        loss and gradients on all parameters.
        Inputs:
        - features: Input image features, of shape (N, D)
        - captions: Ground-truth captions; an integer array of shape (N, T) where
          each element is in the range 0 <= y[i, t] < V
        Returns a tuple of:
        - loss: Scalar loss
        - grads: Dictionary of gradients parallel to self.params
        """
        # Cut captions into two pieces: captions_in has everything but the last word
        # and will be input to the RNN; captions_out has everything but the first
        # word and this is what we will expect the RNN to generate. These are offset
        # by one relative to each other because the RNN should produce word (t+1)
        # after receiving word t. The first element of captions_in will be the START
        # token, and the first element of captions_out will be the first word.
        captions_in = captions[:, :-1]
        captions_out = captions[:, 1:]

        # You'll need this
        mask = (captions_out != self._null)

        # Weight and bias for the affine transform from image features to initial
        # hidden state
        W_proj, b_proj = self.params['W_proj'], self.params['b_proj']

        # Word embedding matrix
        W_embed = self.params['W_embed']

        # Input-to-hidden, hidden-to-hidden, and biases for the RNN
        Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']

        # Weight and bias for the hidden-to-vocab transformation.
        W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the forward and backward passes for the CaptioningRNN.   #
        # In the forward pass you will need to do the following:                   #
        # (1) Use an fc transformation to compute the initial hidden state         #
        #     from the image features. This should produce an array of shape (N, H)#
        # (2) Use a word embedding layer to transform the words in captions_in     #
        #     from indices to vectors, giving an array of shape (N, T, W).         #
        # (3) Use a vanilla RNN to process the sequence of input word vectors      #
        #     of shape (T, N, W), and produce hidden state vectors for all         #
        #     timesteps, producing an array of shape (T, N, H).                    #
        # (4) Use a (temporal) fc transformation to compute scores over the        #
        #     vocabulary at every timestep using the hidden states, giving an      #
        #     array of shape (N, T, V).                                            #
        # (5) Use (temporal) softmax to compute loss using captions_out, ignoring  #
        #     the points where the output word is <NULL> using the mask above.     #
        #                                                                          #
        # In the backward pass you will need to compute the gradient of the loss   #
        # with respect to all model parameters. Use the loss and grads variables   #
        # defined above to store loss and gradients; grads[k] should give the      #
        # gradients for self.params[k].                                            #
        ############################################################################

        # Forward Pass
        #step 1
        h0, cache_0 = layers.fc_forward(features, W_proj, b_proj)

        #step 2
        word_embedded, word_embedded_cache = word_embedding_forward(captions_in, W_embed)
        word_embedded = np.transpose(word_embedded, (1,0,2))

        #step 3
        h, cache_rnn = rnn_forward(word_embedded, h0, Wx, Wh, b)
        h = np.transpose(h, (1,0,2))

        #step 4
        y_hat, cache_temp = temporal_fc_forward(h, W_vocab, b_vocab)

        #step 5
        loss, dout = temporal_softmax_loss(y_hat, captions_out, mask)

        # Gradients
        #temporal backward
        dh, grads['W_vocab'], grads['b_vocab'] = temporal_fc_backward(dout, cache_temp)
        dh = np.transpose(dh, (1,0,2))

        #rnn backward
        d_word, dh0, grads['Wx'], grads['Wh'], grads['b'] = rnn_backward(dh, cache_rnn)
        d_word = np.transpose(d_word, (1,0,2))

        #word embedded backward
        grads['W_embed'] = word_embedding_backward(d_word, word_embedded_cache)

        #full connected backward
        d_feature, grads['W_proj'], grads['b_proj'] = layers.fc_backward(dh0, cache_0)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Ejemplo n.º 9
0
    def loss(self, features, captions):
        """
        Compute training-time loss for the RNN. We input image features and
        ground-truth captions for those images, and use an RNN to compute
        loss and gradients on all parameters.
        Inputs:
        - features: Input image features, of shape (N, D)
        - captions: Ground-truth captions; an integer array of shape (N, T) where
          each element is in the range 0 <= y[i, t] < V
        Returns a tuple of:
        - loss: Scalar loss
        - grads: Dictionary of gradients parallel to self.params
        """
        # Cut captions into two pieces: captions_in has everything but the last word
        # and will be input to the RNN; captions_out has everything but the first
        # word and this is what we will expect the RNN to generate. These are offset
        # by one relative to each other because the RNN should produce word (t+1)
        # after receiving word t. The first element of captions_in will be the START
        # token, and the first element of captions_out will be the first word.
        captions_in = captions[:, :-1]
        captions_out = captions[:, 1:]

        # You'll need this
        mask = (captions_out != self._null)

        # Weight and bias for the affine transform from image features to initial
        # hidden state
        W_proj, b_proj = self.params['W_proj'], self.params['b_proj']

        # Word embedding matrix
        W_embed = self.params['W_embed']

        # Input-to-hidden, hidden-to-hidden, and biases for the RNN
        Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']

        # Weight and bias for the hidden-to-vocab transformation.
        W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']

        loss, grads = 0.0, {}

        # Forward Pass
        fc, cache1 = layers.fc_forward(features, W_proj, b_proj)
        emb, cache2 = rnn_layers.word_embedding_forward(captions_in, W_embed)
        emb = emb.transpose(1, 0, 2)
        rnn, cache3 = rnn_layers.rnn_forward(emb, fc, Wx, Wh, b)
        rnn = rnn.transpose(1, 0, 2)
        tfc, cache4 = rnn_layers.temporal_fc_forward(rnn, W_vocab, b_vocab)
        loss, dout = rnn_layers.temporal_softmax_loss(tfc, captions_out, mask)

        # Gradients
        dtfc, dW_vocab, db_vocab = rnn_layers.temporal_fc_backward(
            dout, cache4)
        dtfc = dtfc.transpose(1, 0, 2)
        drnn, dfc, dWx, dWh, db = rnn_layers.rnn_backward(dtfc, cache3)
        drnn = drnn.transpose(1, 0, 2)
        dW_embed = rnn_layers.word_embedding_backward(drnn, cache2)
        dfeature, dW_proj, db_proj = layers.fc_backward(dfc, cache1)

        grads = {
            'W_embed': dW_embed,
            'W_proj': dW_proj,
            'W_vocab': dW_vocab,
            'Wh': dWh,
            'Wx': dWx,
            'b': db,
            'b_proj': db_proj,
            'b_vocab': db_vocab
        }
        return loss, grads
Ejemplo n.º 10
0
    def loss(self, features, captions):
        """
        Compute training-time loss for the RNN. We input image features and
        ground-truth captions for those images, and use an RNN to compute
        loss and gradients on all parameters.
        Inputs:
        - features: Input image features, of shape (N, D)
        - captions: Ground-truth captions; an integer array of shape (N, T) where
          each element is in the range 0 <= y[i, t] < V
        Returns a tuple of:
        - loss: Scalar loss
        - grads: Dictionary of gradients parallel to self.params
        """
        # Cut captions into two pieces: captions_in has everything but the last word
        # and will be input to the RNN; captions_out has everything but the first
        # word and this is what we will expect the RNN to generate. These are offset
        # by one relative to each other because the RNN should produce word (t+1)
        # after receiving word t. The first element of captions_in will be the START
        # token, and the first element of captions_out will be the first word.
        captions_in = captions[:, :-1]
        captions_out = captions[:, 1:]

        # You'll need this
        mask = (captions_out != self._null)

        # Weight and bias for the affine transform from image features to initial
        # hidden state
        W_proj, b_proj = self.params['W_proj'], self.params['b_proj']

        # Word embedding matrix
        W_embed = self.params['W_embed']

        # Input-to-hidden, hidden-to-hidden, and biases for the RNN
        Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']

        # Weight and bias for the hidden-to-vocab transformation.
        W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']

        loss, grads = 0.0, {}
        batch_size, input_dim = features.shape
        _, n_time_steps = captions_in.shape
        wordvec_dim = Wx.shape[0]
        hidden_dim = Wh.shape[0]
        vocab_size = W_vocab.shape[1]
        ############################################################################
        # TODO: Implement the forward and backward passes for the CaptioningRNN.   #
        # In the forward pass you will need to do the following:                   #
        # (1) Use an fc transformation to compute the initial hidden state         #
        #     from the image features. This should produce an array of shape (N, H)#
        # (2) Use a word embedding layer to transform the words in captions_in     #
        #     from indices to vectors, giving an array of shape (N, T, W).         #
        # (3) Use a vanilla RNN to process the sequence of input word vectors      #
        #     and produce hidden state vectors for all timesteps, producing        #
        #     an array of shape (T, N, H).                                         #
        # (4) Use a (temporal) fc transformation to compute scores over the        #
        #     vocabulary at every timestep using the hidden states, giving an      #
        #     array of shape (N, T, V).                                            #
        # (5) Use (temporal) softmax to compute loss using captions_out, ignoring  #
        #     the points where the output word is <NULL> using the mask above.     #
        #                                                                          #
        # In the backward pass you will need to compute the gradient of the loss   #
        # with respect to all model parameters. Use the loss and grads variables   #
        # defined above to store loss and gradients; grads[k] should give the      #
        # gradients for self.params[k].                                            #
        ############################################################################

        # Forward Pass
        # N x T x D
        # (1) compute the initial hidden state (N, H)
        h0, cache_h0 = fc_forward(features, W_proj, b_proj)

        # (2) transform the words in captions_in to vectors (N, T, W)
        x, cache_emb = word_embedding_forward(captions_in, W_embed)
        x_trans = np.transpose(x, (1, 0, 2))
        # (3) produce hidden state vectors for all timestapes (N, T, H)
        h_trans, cache_h = rnn_forward(x_trans, h0, Wx, Wh, b)
        h = np.transpose(h_trans, (1, 0, 2))
        # (4) compute scores over the vocabulary (N, T, V)
        out, cache_out = temporal_fc_forward(h, W_vocab, b_vocab)

        # (5) compute softmax loss using captions_out
        loss, dout = temporal_softmax_loss(out, captions_out, mask)

        # Gradients####################################
        # (6) backprop for (4)
        dout = dout.reshape(-1, vocab_size)  # (N x T, V)
        dh, grads['W_vocab'], grads['b_vocab'] = temporal_fc_backward(
            dout, cache_out)
        dh = np.transpose(dh, (1, 0, 2))
        # (7) backprop for (3)
        dx, dh0, grads['Wx'], grads['Wh'], grads['b'] = rnn_backward(
            dh, cache_h)
        dx = np.transpose(dx, (1, 0, 2))
        # (8) backprop for (2)
        grads['W_embed'] = word_embedding_backward(dx, cache_emb)

        # (9) backprop for (1)
        _, grads['W_proj'], grads['b_proj'] = fc_backward(dh0, cache_h0)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads