Ejemplo n.º 1
0
    def loss(self, X, y=None, reg=1e-5):
        print 'start computing loss and grad.............'
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # pass conv_param to the forward pass for the convolutional layer
        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}

        # pass pool_param to the forward pass for the max-pooling layer
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

        # compute the forward pass
        print 'compute the forward pass......'
        print 'compute the w1 conv_relu_pool_forward forward pass......'
        a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param,
                                                   pool_param)

        print 'compute the w2 affine_relu_forward forward pass......'
        a2, cache2 = layers.affine_relu_forward(a1, W2, b2)

        print 'compute the w3 affine_forward forward pass......'
        scores, cache3 = layers.affine_forward(a2, W3, b3)

        if y is None:
            return scores

        # compute the backward pass
        print 'compute the backward pass......'
        print 'compute the softmax_loss backward pass......'
        data_loss, dscores = layers.softmax_loss(scores, y)

        print 'compute the dw3 affine_backward backward pass......'
        da2, dW3, db3 = layers.affine_backward(dscores, cache3)

        print 'compute the dw2 affine_relu_backward backward pass......'
        da1, dW2, db2 = layers.affine_relu_backward(da2, cache2)

        print 'compute the dw1 conv_relu_pool_backward backward pass......'
        dX, dW1, db1 = layers.conv_relu_pool_backward(da1, cache1)

        # Add regularization
        dW1 += self.reg * W1
        dW2 += self.reg * W2
        dW3 += self.reg * W3
        reg_loss = 0.5 * self.reg * sum(np.sum(W * W) for W in [W1, W2, W3])
        loss = data_loss + reg_loss
        grads = {
            'W1': dW1,
            'b1': db1,
            'W2': dW2,
            'b2': db2,
            'W3': dW3,
            'b3': db3
        }
        print ' computing loss and grad end !!!!!!!!!!!!!!!!!'
        print 'loss is :', loss
        return loss, grads
Ejemplo n.º 2
0
    def loss(self, X, y=None, reg=1e-5):

        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # pass conv_param to the forward pass for the convolutional layer
        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}

        # pass pool_param to the forward pass for the max-pooling layer
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

        # compute the forward pass
        a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param,
                                                   pool_param)
        norm_out, norm_cache = layers.spatial_batchnorm_forward(
            a1, 1, 0, bn_param={'mode': 'train'})

        a2, cache2 = layers.affine_relu_forward(norm_out, W2, b2)
        scores, cache3 = layers.affine_forward(a2, W3, b3)

        if y is None:
            return scores

        # compute the backward pass
        data_loss = NUS_loss_test.NUSDataTrain().loss(scores, y)
        dscores = NUS_loss_test.NUSDataTrain().eval_numerical_gradient(
            NUS_loss_test.NUSDataTrain().grad_loss,
            scores)  # layers.softmax_loss(scores, y)#改这里
        da2, dW3, db3 = layers.affine_backward(dscores, cache3)
        da1, dW2, db2 = layers.affine_relu_backward(da2, cache2)
        dnorm_out, dgamma, dbeta = layers.spatial_batchnorm_backward(
            da1, norm_cache)
        dX, dW1, db1 = layers.conv_relu_pool_backward(dnorm_out, cache1)

        # Add regularization
        dW1 += self.reg * W1
        dW2 += self.reg * W2
        dW3 += self.reg * W3
        reg_loss = 0.5 * self.reg * sum(np.sum(W * W) for W in [W1, W2, W3])
        loss = data_loss + reg_loss
        grads = {
            'W1': dW1,
            'b1': db1,
            'W2': dW2,
            'b2': db2,
            'W3': dW3,
            'b3': db3
        }

        return loss, grads
Ejemplo n.º 3
0
    def loss(self,X,y=None):
        """
                Compute loss and gradient for a minibatch of data.
                Inputs:
                - X: Array of input data of shape (N, d_1, ..., d_k)
                - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
                Returns:
                If y is None, then run a test-time forward pass of the model and return:
                - scores: Array of shape (N, C) giving classification scores, where
                  scores[i, c] is the classification score for X[i] and class c.
                If y is not None, then run a training-time forward and backward pass and
                return a tuple of:
                - loss: Scalar value giving the loss
                - grads: Dictionary with the same keys as self.params, mapping parameter
                  names to gradients of the loss with respect to those parameters.
                """
        scores = None
        W1,b1 = self.params['W1'],self.params['b1']
        W2,b2 = self.params['W2'],self.params['b2']

        ar1_out,ar1_cache = affine_relu_forward(X,W1,b1)
        ar2_out,ar2_cache = affine_forward(ar1_out,W2,b2)

        scores = ar2_out

        if y is None:
            return scores
        loss,grads = 0,{}
        loss,dout = softmax_loss(scores,y)
        loss = loss+0.5*self.reg*np.sum(W1*W1)+0.5*self.reg*np.sum(W2*W2)
        dx2,dw2,db2 = affine_backward(dout,ar2_cache)
        grads['W2'] = dw2 +self.reg*W2
        grads['b2'] = db2
        dx1,dw1,db1 = affine_relu_backward(dx2,ar1_cache)
        grads['W1'] = dw1+self.reg*W1
        grads['b1'] = db1

        return loss,grads
Ejemplo n.º 4
0
    def predict(self, X):
        """
            Inputs:
                - X: A numpy array of shape (N, D) giving N D-dimensional data points to
                     classify.
                Returns:
                - y_pred: A numpy array of shape (N,) giving predicted labels for each of
                          the elements of X. For all i, y_pred[i] = c means that X[i] is
                          predicted to have class c, where 0 <= c < C.
        """
        y_pred = None

        # h1 = layers.ReLU(np.dot(X, self.params['W1']) + self.params['b1'])
        # scores = np.dot(h1, self.params['W2']) + self.params['b2']
        # y_pred = np.argmax(scores, axis=1)
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # pass conv_param to the forward pass for the convolutional layer
        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}

        # pass pool_param to the forward pass for the max-pooling layer
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

        # compute the forward pass

        a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param,
                                                   pool_param)
        a2, cache2 = layers.affine_relu_forward(a1, W2, b2)

        scores, cache3 = layers.affine_forward(a2, W3, b3)
        y_pred = np.argmax(scores, axis=1)

        return y_pred
Ejemplo n.º 5
0
        def loss(self, X, y=None):
            """
            Compute loss and gradient for the fully-connected net.
            Input / output: Same as TwoLayerNet above.
            """
            X = X.astype(self.dtype)
            mode = 'test' if y is None else 'train'

            # Set train/test mode for batchnorm params and dropout param since they
            # behave differently during training and testing.
            if self.dropout_param is not None:
                self.dropout_param['mode'] = mode
            if self.use_batchnorm:
                for bn_param in self.bn_params:
                    bn_param['mode'] = mode

            scores = None
            ############################################################################
            # TODO: Implement the forward pass for the fully-connected net, computing  #
            # the class scores for X and storing them in the scores variable.          #
            #                                                                          #
            # When using dropout, you'll need to pass self.dropout_param to each       #
            # dropout forward pass.                                                    #
            #                                                                          #
            # When using batch normalization, you'll need to pass self.bn_params[0] to #
            # the forward pass for the first batch normalization layer, pass           #
            # self.bn_params[1] to the forward pass for the second batch normalization #
            # layer, etc.                                                              #
            ############################################################################
            layer_input = X
            ar_cache = {}
            dp_cache = {}

            for lay in xrange(self.num_layers - 1):
                if self.use_batchnorm:
                    layer_input, ar_cache[lay] = affine_bn_relu_forward(layer_input,
                                                                        self.params['W%d' % (lay + 1)],
                                                                        self.params['b%d' % (lay + 1)],
                                                                        self.params['gamma%d' % (lay + 1)],
                                                                        self.params['beta%d' % (lay + 1)],
                                                                        self.bn_params[lay])
                else:
                    layer_input, ar_cache[lay] = affine_relu_forward(layer_input, self.params['W%d' % (lay + 1)],
                                                                     self.params['b%d' % (lay + 1)])

                if self.use_dropout:
                    layer_input, dp_cache[lay] = dropout_forward(layer_input, self.dropout_param)

            ar_out, ar_cache[self.num_layers] = affine_forward(layer_input, self.params['W%d' % (self.num_layers)],
                                                               self.params['b%d' % (self.num_layers)])
            scores = ar_out
            # pass
            ############################################################################
            #                             END OF YOUR CODE                             #
            ############################################################################

            # If test mode return early
            if mode == 'test':
                return scores

            loss, grads = 0.0, {}
            ############################################################################
            # TODO: Implement the backward pass for the fully-connected net. Store the #
            # loss in the loss variable and gradients in the grads dictionary. Compute #
            # data loss using softmax, and make sure that grads[k] holds the gradients #
            # for self.params[k]. Don't forget to add L2 regularization!               #
            #                                                                          #
            # When using batch normalization, you don't need to regularize the scale   #
            # and shift parameters.                                                    #
            #                                                                          #
            # NOTE: To ensure that your implementation matches ours and you pass the   #
            # automated tests, make sure that your L2 regularization includes a factor #
            # of 0.5 to simplify the expression for the gradient.                      #
            ############################################################################
            loss, dscores = softmax_loss(scores, y)
            dhout = dscores
            loss = loss + 0.5 * self.reg * np.sum(
                self.params['W%d' % (self.num_layers)] * self.params['W%d' % (self.num_layers)])
            dx, dw, db = affine_backward(dhout, ar_cache[self.num_layers])
            grads['W%d' % (self.num_layers)] = dw + self.reg * self.params['W%d' % (self.num_layers)]
            grads['b%d' % (self.num_layers)] = db
            dhout = dx
            for idx in xrange(self.num_layers - 1):
                lay = self.num_layers - 1 - idx - 1
                loss = loss + 0.5 * self.reg * np.sum(self.params['W%d' % (lay + 1)] * self.params['W%d' % (lay + 1)])
                if self.use_dropout:
                    dhout = dropout_backward(dhout, dp_cache[lay])
                if self.use_batchnorm:
                    dx, dw, db, dgamma, dbeta = affine_bn_relu_backward(dhout, ar_cache[lay])
                else:
                    dx, dw, db = affine_relu_backward(dhout, ar_cache[lay])
                grads['W%d' % (lay + 1)] = dw + self.reg * self.params['W%d' % (lay + 1)]
                grads['b%d' % (lay + 1)] = db
                if self.use_batchnorm:
                    grads['gamma%d' % (lay + 1)] = dgamma
                    grads['beta%d' % (lay + 1)] = dbeta
                dhout = dx
            # pass
            ############################################################################
            #                             END OF YOUR CODE                             #
            ############################################################################

            return loss, grads