コード例 #1
0
def test_softmax(num_classes, samples=random.randrange(1,10)):
	num_classes, num_inputs = num_classes, 50
	x = 0.001 * np.random.randn(num_inputs, num_classes)
	y = np.random.randint(num_classes, size=num_inputs)

	dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False)
	loss, dx = softmax_loss(x, y)
	assert dx_num.shape == dx.shape
	assert loss < - np.log( 0.8 / num_classes) and loss > - np.log( 1.2 / num_classes)
	assert rel_error(dx_num, dx) < 5e-7
コード例 #2
0
def test_softmax(num_classes, samples=random.randrange(1,10)):
    num_classes, num_inputs = num_classes, 50
    x = 0.001 * np.random.randn(num_inputs, num_classes)
    y = np.random.randint(num_classes, size=num_inputs)

    dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False)
    loss, dx = softmax_loss(x, y)
    assert dx_num.shape == dx.shape
    assert loss < - np.log( 0.8 / num_classes) and loss > - np.log( 1.2 / num_classes)
    assert rel_error(dx_num, dx) < 5e-7
コード例 #3
0
ファイル: IMgrds.py プロジェクト: Hidenver2016/cs231n
def make_fooling_image(
        X, target_y,
        model):  # a method based on softmax loss and regularization
    """
  Generate a fooling image that is close to X, but that the model classifies
  as target_y.
  
  Inputs:
  - X: Input image, of shape (1, 3, 64, 64)
  - target_y: An integer in the range [0, 100)
  - model: A PretrainedCNN
  
  Returns:
  - X_fooling: An image that is close to X, but that is classifed as target_y
    by the model.
  """
    X_fooling = X.copy()
    ##############################################################################
    # TODO: Generate a fooling image X_fooling that the model will classify as   #
    # the class target_y. Use gradient ascent on the target class score, using   #
    # the model.forward method to compute scores and the model.backward method   #
    # to compute image gradients.                                                #
    #                                                                            #
    # HINT: For most examples, you should be able to generate a fooling image    #
    # in fewer than 100 iterations of gradient ascent.                           #
    ##############################################################################
    N = X.shape[0]
    reg = 5e-5
    from cs231n.layers import softmax_loss
    for i in range(100):
        R = X_fooling - X
        scores, cache = model.forward(X_fooling, mode='test')
        loss, dscores = softmax_loss(scores, target_y)
        loss += 0.5 * reg * np.sum(R * R)
        print('softmax loss:', loss)
        y_pred = np.argmax(scores)
        print('target class index', target_y, 'current class index:', y_pred)
        if target_y == y_pred:
            print('iter num:', i)
            break
        else:
            df, _ = model.backward(dscores, cache)
            #print dX
            dX = reg * R + df
            X_fooling -= 6000 * dX
    #passhttp://10.10.7.221:8890/notebooks/assignment3/ImageGradients.ipynb#
    ##############################################################################
    #                             END OF YOUR CODE                               #
    ##############################################################################
    return X_fooling
コード例 #4
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = "test" if y is None else "train"

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param["mode"] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param["mode"] = mode
        """
        loss 과정에서 활용할 리스트들
        [i] : i번째 layer의 변수들
        """
        fc = []
        relu = []
        bn = []
        dropout = []
        cache_bn = []
        cache_fc = []
        cache_relu = []
        cache_dropout = []
        fc.append(0)
        bn.append(0)
        relu.append(X)
        dropout.append(0)
        cache_bn.append(0)
        cache_dropout.append(0)
        cache_fc.append(0)
        cache_relu.append(0)
        # 맨 처음 trian data X를 집어넣어준다
        # 0으로 모든 리스트를 초기화해준다
        # 이러한 작업을 해주는 이유 : 인덱스를 1부터 L-1까지 활용하기 위함
        """
        fc_i : i번째 layer의 output
        cache_fc_i : i번째 layer의 input
        """
        for i in range(1, self.num_layers):  # 1부터 L-1까지
            # affine
            fc_i, cache_fc_i = affine_forward(relu[i - 1],
                                              self.params["W" + str(i)],
                                              self.params["b" + str(i)])
            fc.append(fc_i)
            cache_fc.append(cache_fc_i)
            if self.use_batchnorm:
                # batchnorm
                bn_i, cache_bn_i = batchnorm_forward(
                    fc_i,
                    gamma=self.params["gamma" + str(i)],
                    beta=self.params["beta" + str(i)],
                    bn_param=self.bn_params[i - 1],
                )
                bn.append(bn_i)
                cache_bn.append(cache_bn_i)
                # relu
                relu_i, cache_relu_i = relu_forward(bn_i)
                relu.append(relu_i)
                cache_relu.append(cache_relu_i)
            else:
                # relu
                relu_i, cache_relu_i = relu_forward(fc[i])
                relu.append(relu_i)
                cache_relu.append(cache_relu_i)

            # dropout layer
            if self.use_dropout:
                dropout_i, cache_dropout_i = dropout_forward(
                    relu_i, dropout_param=self.dropout_param)
                dropout.append(dropout_i)
                cache_dropout.append(cache_dropout_i)

        # 마지막 L번째 layer : affine & softmax
        fc_L, cache_fc_L = affine_forward(
            dropout[-1] if self.use_dropout else relu[-1],
            self.params["W" + str(self.num_layers)],
            self.params["b" + str(self.num_layers)])
        fc.append(fc_L)
        cache_fc.append(cache_fc_L)

        # (N,C)

        scores = fc[self.num_layers]
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == "test":
            return scores

        loss, grads = 0.0, {}
        loss, d_scores = softmax_loss(scores, y)

        dx_ = []
        dfc = []
        drelu = []
        dbatch = []
        ddropout = []

        # 맨 마지막 Layer
        drelu_L, dWL, dbL = affine_backward(d_scores,
                                            cache_fc[self.num_layers])
        dfc.append(d_scores)
        dx_.append(drelu_L)
        grads["W" + str(self.num_layers)] = dWL
        grads["b" + str(self.num_layers)] = dbL

        for i in range(self.num_layers - 1, 0,
                       -1):  # N-1, 1 : all hidden layer
            # dropout backward
            if self.use_dropout:
                ddropout_i = dropout_backward(dx_[-1], cache_dropout[i])
                ddropout.append(ddropout_i)

            # relu backward
            d_fc = relu_backward(ddropout[-1] if self.use_dropout else dx_[-1],
                                 cache_relu[i])

            # batch normalization
            if self.use_batchnorm:
                # vriable name = d_fc이지만 사실은 d_batch
                dbatch.append(d_fc)
                # print('i = ', i)
                # print('length of cache_bn = ', len(cache_bn))
                d_fc, dgamma, dbeta = batchnorm_backward(dbatch[-1],
                                                         cache=cache_bn[i])
                grads["gamma" + str(i)] = dgamma
                grads["beta" + str(i)] = dbeta
                dfc.append(d_fc)
            else:
                dfc.append(d_fc)

            # affine backward
            dx, dw, db = affine_backward(dfc[-1], cache_fc[i])
            dx_.append(dx)
            grads["W" + str(i)] = dw
            grads["b" + str(i)] = db
            # if (i == 1):
            # print(i)

        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #5
0
ファイル: fc_net.py プロジェクト: yin-hong/cs231n-assignment
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        hidden1_out, h1_cache = affine_forward(X, self.params['W1'],
                                               self.params['b1'])
        relu_out, relu_cache = relu_forward(hidden1_out)
        scores, h2_cache = affine_forward(relu_out, self.params['W2'],
                                          self.params['b2'])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        """
        X_reshape=np.reshape(X,(X.shape[0],-1))
        num_trains=X.shape[0]
        loss,_=softmax_loss(scores,y)
        loss=loss+self.reg*0.5*(np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']))
        softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        grads['b2']=np.zeros_like(self.params['b2'])
        grads['W2']=np.zeros_like(self.params['W2'])
        grads['b1']=np.zeros_like(self.params['b1'])
        grads['W1']=np.zeros_like(self.params['W1'])
        grads['b2']=np.sum(softmax_output,axis=0)
        grads['W2']=np.dot(relu_out.T,softmax_output)
        grads_b1_tmp=np.dot(softmax_output,self.params['W2'].T)
        tmp=(relu_out>0)*grads_b1_tmp
        grads['b1']=np.sum(tmp,axis=0)
        grads['W1']=np.dot(X_reshape.T,grads_b1_tmp)
        grads['W1']=grads['W1']/num_trains+self.reg*self.params['W1']
        grads['b1']=grads['b1']/num_trains
        grads['W2']=grads['W2']/num_trains+self.reg*self.params['W2']
        grads['b2']=grads['b2']/num_trains
        """
        num_trains = X.shape[0]
        loss, dscore = softmax_loss(scores, y)
        loss = loss + self.reg * 0.5 * (
            np.sum(self.params['W2'] * self.params['W2']) +
            np.sum(self.params['W1'] * self.params['W1']))
        grads_h2, grads_w2, grads_b2 = affine_backward(dout=dscore,
                                                       cache=h2_cache)
        grads_relu = relu_backward(grads_h2, relu_cache)
        grads_h1, grads_w1, grads_b1 = affine_backward(grads_relu, h1_cache)
        grads['W1'] = grads_w1 + self.reg * self.params['W1']
        grads['W2'] = grads_w2 + self.reg * self.params['W2']
        grads['b1'] = grads_b1
        grads['b2'] = grads_b2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        return loss, grads
コード例 #6
0
ファイル: fc_net.py プロジェクト: yin-hong/cs231n-assignment
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        """
        if self.use_dropout:
            self.dropout_param['mode']=mode
       """
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        X_temp = X
        affine_Input = list()
        relu_input = list()
        batchnorm_input = list()
        dropout_input = list()
        score_tmp = None
        for i in range(self.num_layers - 1):
            tmp, affine_input_tmp = affine_forward(
                X_temp, self.params['W' + str(i + 1)],
                self.params['b' + str(i + 1)])
            if self.use_batchnorm:
                tmp, batchnorm_cache = batchnorm_forward(
                    tmp, self.params['gamma' + str(i + 1)],
                    self.params['beta' + str(i + 1)], self.bn_params[i])
                batchnorm_input.append(batchnorm_cache)
            score_tmp, relu_input_tmp = relu_forward(tmp)
            if self.use_dropout:
                score_tmp, dropout_cache = dropout_forward(
                    score_tmp, self.dropout_param)
                dropout_input.append(dropout_cache)
            affine_Input.append(affine_input_tmp)
            relu_input.append(relu_input_tmp)
            X_temp = score_tmp
        scores, last_input_tmp = affine_forward(
            score_tmp, self.params['W' + str(self.num_layers)],
            self.params['b' + str(self.num_layers)])
        affine_Input.append(last_input_tmp)
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        if mode == 'test':
            return scores
        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        num_trains = X.shape[0]
        loss, dscores = softmax_loss(scores, y)
        weight_decay_sum = 0
        for i in range(self.num_layers):
            tmp = np.sum(self.params['W' + str(i + 1)] *
                         self.params['W' + str(i + 1)])
            weight_decay_sum = weight_decay_sum + tmp

        loss = loss + 0.5 * self.reg * weight_decay_sum
        #softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        #softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        dout = dscores
        for i in range(self.num_layers):
            dx, dw, db = affine_backward(dout, affine_Input[-(i + 1)])
            grads['W' +
                  str(self.num_layers - i)] = dw + self.reg * self.params[
                      'W' + str(self.num_layers - i)]
            grads['b' + str(self.num_layers - i)] = db
            if self.use_dropout and i != self.num_layers - 1:
                dx = dropout_backward(dx, dropout_input[-(i + 1)])
            if i != self.num_layers - 1:
                dout = relu_backward(dx, relu_input[-(i + 1)])
            if i != self.num_layers - 1 and self.use_batchnorm:
                dout, dgamma, dbeta = batchnorm_backward(
                    dout, batchnorm_input[-(i + 1)])
                grads['gamma' + str(self.num_layers - i - 1)] = dgamma
                grads['beta' + str(self.num_layers - i - 1)] = dbeta

        return loss, grads
コード例 #7
0
ファイル: fc_net.py プロジェクト: jrmontag/cs231n-1
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        #######################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the
        # class scores for X and storing them in the scores variable.
        #######################################################################
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]

        N = X.shape[0]
        C = W2.shape[1]

        scores = np.zeros((N, C))

        X_hidden, cache1 = affine_relu_forward(X, W1, b1)
        scores, cache2 = affine_forward(X_hidden, W2, b2)

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        #######################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the
        # loss in the loss variable and gradients in the grads dictionary.
        # Compute data loss using softmax, and make sure that grads[k]
        # holds the gradients for self.params[k]. Don't forget to add L2
        # regularization!
        #
        # NOTE: To ensure that your implementation matches ours and you pass
        # the automated tests, make sure that your L2 regularization includes a
        # factor of 0.5 to simplify the expression for the gradient.
        #######################################################################

        loss, dscores = softmax_loss(scores, y)
        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))

        dx_hidden, dw2, db2 = affine_backward(dscores, cache2)
        grads["W2"] = dw2 + self.reg * W2
        grads["b2"] = db2

        dx, dw1, db1 = affine_relu_backward(dx_hidden, cache1)
        grads["W1"] = dw1 + self.reg * W1
        grads["b1"] = db1
        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        return loss, grads
コード例 #8
0
ファイル: fc_net.py プロジェクト: jrmontag/cs231n-1
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.dropout_param is not None:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param[mode] = mode

        scores = None
        #######################################################################
        # TODO: Implement the forward pass for the fully-connected net,
        # computing the class scores for X and storing them in the scores
        # variable.
        #
        # When using dropout, you'll need to pass self.dropout_param to each
        # dropout forward pass.
        #
        # When using batch normalization, you'll need to pass self.bn_params[0]
        # to the forward pass for the first batch normalization layer,
        # pass self.bn_params[1] to the forward pass for the second batch
        # normalization layer, etc.
        #######################################################################
        IN = X

        caches = {}
        if self.use_dropout:
            dropout_caches = {}

        for l in range(self.num_layers - 1):
            W = self.params["W{}".format(l + 1)]
            b = self.params["b{}".format(l + 1)]

            if self.use_batchnorm:
                gamma = self.params["gamma{}".format(l + 1)]
                beta = self.params["beta{}".format(l + 1)]
                IN, cache = affine_batchnorm_relu_forward(
                    IN, W, b, gamma, beta, self.bn_params[l])
            else:
                IN, cache = affine_relu_forward(IN, W, b)

            caches[l] = cache

            if self.use_dropout:
                IN, d_cache = dropout_forward(IN, self.dropout_param)
                dropout_caches[l] = d_cache

        # forward pass: last affine layer
        num_last = self.num_layers
        name_W_last = "W{}".format(num_last)
        name_b_last = "b{}".format(num_last)
        W_last = self.params[name_W_last]
        b_last = self.params[name_b_last]

        scores, cache_last = affine_forward(IN, W_last, b_last)

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        #######################################################################
        # TODO: Implement the backward pass for the fully-connected net.
        # Store the loss in the loss variable and gradients in the grads
        # dictionary. Compute data loss using softmax, and make sure that
        # grads[k] holds the gradients for self.params[k]. Don't forget to add
        # L2 regularization!
        #
        # When using batch normalization, you don't need to regularize the
        # scale and shift parameters.
        #
        # NOTE: To ensure that your implementation matches ours and you pass
        # the automated tests, make sure that your L2 regularization includes a
        # factor of 0.5 to simplify the expression for the gradient.
        #######################################################################

        # loss
        loss, dscores = softmax_loss(scores, y)

        # regularization loss
        for l in range(self.num_layers):
            W = self.params["W{}".format(l + 1)]
            loss += 0.5 * self.reg * np.sum(W * W)

        # backprop through last affine layer
        dx, dw, db = affine_backward(dscores, cache_last)
        grads[name_W_last] = dw + self.reg * W_last
        grads[name_b_last] = db

        # backprop through affine-batchnorm-relu layers
        for l in reversed(range(self.num_layers - 1)):
            name_W = "W{}".format(l + 1)
            name_b = "b{}".format(l + 1)

            if self.use_dropout:
                dx = dropout_backward(dx, dropout_caches[l])

            if self.use_batchnorm:
                dx, dw, db, dgamma, dbeta = affine_batchnorm_relu_backward(
                    dx, caches[l])
                grads["gamma{}".format(l + 1)] = dgamma
                grads["beta{}".format(l + 1)] = dbeta
            else:
                dx, dw, db = affine_relu_backward(dx, caches[l])
            grads[name_W] = dw + self.reg * self.params[name_W]
            grads[name_b] = db

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        return loss, grads
コード例 #9
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        combo_caches = []
        fc_cache = None

        N = X.shape[0]
        D = np.prod(X.shape[1:])
        x_ = X.reshape(N, D)

        # middle combo layers
        for layer in range(1, self.num_layers):  #[1, 2, ..., L-1]
            w = self.params['W' + str(layer)]
            b = self.params['b' + str(layer)]

            # prepare for batch normalization
            gamma, beta, bn_parma = 1., 0, None
            if self.normalization == 'batchnorm':
                gamma = self.params['gamma' + str(layer)]
                beta = self.params['beta' + str(layer)]
                bn_parma = self.bn_params[layer - 1]  # zero based

            x_, cache = combo_forward(x_, w, b, gamma, beta, bn_parma)
            combo_caches.append(cache)

        # final fully connected layer
        w = self.params['W' + str(self.num_layers)]
        b = self.params['b' + str(self.num_layers)]
        scores, fc_cache = affine_forward(x_, w, b)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        loss, dout = softmax_loss(scores, y)

        # finall fully connected layer
        dout, dw, db = affine_backward(dout, fc_cache)
        grads['W' +
              str(self.num_layers
                  )] = dw + self.reg * self.params['W' + str(self.num_layers)]
        grads['b' + str(self.num_layers)] = db
        # adjust loss with regularization term of dWL
        loss += 0.5 * self.reg * np.sum(self.params['W' + str(self.num_layers)]
                                        **2)

        # middle combo layers
        for layer in range(self.num_layers - 1, 0, -1):  # [L-1, L-2, ... ,1]

            dout, dw, db, dgamma, dbeta = combo_backward(
                dout, combo_caches[layer - 1])
            grads['W' +
                  str(layer)] = dw + self.reg * self.params['W' + str(layer)]
            grads['b' + str(layer)] = db

            if self.normalization == 'batchnorm':
                grads['gamma' + str(layer)] = dgamma
                grads['beta' + str(layer)] = dbeta

            # adjust loss with regularization term of dWl
            loss += 0.5 * self.reg * np.sum(self.params['W' + str(layer)]**2)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #10
0
def two_layer_net(X, model, y=None, reg=0.0):
  """
  Compute the loss and gradients for a two layer fully connected neural network.
  The net has an input dimension of D, a hidden layer dimension of H, and
  performs classification over C classes. We use a softmax loss function and L2
  regularization the the weight matrices. The two layer net should use a ReLU
  nonlinearity after the first affine layer.

  The two layer net has the following architecture:

  input - fully connected layer - ReLU - fully connected layer - softmax

  The outputs of the second fully-connected layer are the scores for each
  class.

  Inputs:
  - X: Input data of shape (N, D). Each X[i] is a training sample.
  - model: Dictionary mapping parameter names to arrays of parameter values.
    It should contain the following:
    - W1: First layer weights; has shape (D, H)
    - b1: First layer biases; has shape (H,)
    - W2: Second layer weights; has shape (H, C)
    - b2: Second layer biases; has shape (C,)
  - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
    an integer in the range 0 <= y[i] < C. This parameter is optional; if it
    is not passed then we only return scores, and if it is passed then we
    instead return the loss and gradients.
  - reg: Regularization strength.

  Returns:
  If y not is passed, return a matrix scores of shape (N, C) where scores[i, c]
  is the score for class c on input X[i].

  If y is not passed, instead return a tuple of:
  - loss: Loss (data loss and regularization loss) for this batch of training
    samples.
  - grads: Dictionary mapping parameter names to gradients of those parameters
    with respect to the loss function. This should have the same keys as model.
  """

  # unpack variables from the model dictionary
  W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
  N, D = X.shape

  # compute the forward pass
  scores = None
  #############################################################################
  # TODO: Perform the forward pass, computing the class scores for the input. #
  # Store the result in the scores variable, which should be an array of      #
  # shape (N, C).                                                             #
  #############################################################################
  # relu = lambda x: np.maximum(x,0)
  # H, C = W2.shape
  # scores = np.zeros((N,C))
  # layer1 = np.maximum(np.dot(X,W1) + b1,0)
  # scores = np.dot(layer1,W2) + b2
  ## above is the test implementation
  ## NOW, using cs231n/layers.py
  ## NOTICE define layer0 = X
  # then behaviour is 'functional' layer(n+1) = f(layer(n) | parameters)
  from cs231n.layers import affine_forward, relu_forward, softmax_loss
  from cs231n.layers import affine_backward, relu_backward

  layer1, cache1 = affine_forward(X, W1, b1)
  layer2, cache2 = relu_forward(layer1)
  layer3, cache3 = affine_forward(layer2, W2, b2)

  scores = layer3
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################
  
  # If the targets are not given then jump out, we're done
  if y is None:
    return scores

  # compute the loss
  loss = None
  #############################################################################
  # TODO: Finish the forward pass, and compute the loss. This should include  #
  # both the data loss and L2 regularization for W1 and W2. Store the result  #
  # in the variable loss, which should be a scalar. Use the Softmax           #
  # classifier loss. So that your results match ours, multiply the            #
  # regularization loss by 0.5                                                #
  #############################################################################
  # rows   = np.sum(np.exp(scores), axis=1)
  # layer4 = np.mean(-layer3[range(N), y] + np.log(rows))
  # loss   = layer4 + 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
  # 
  loss, dx = softmax_loss(scores, y)
  loss += 0.5 * reg * np.sum(W1*W1) + 0.5 * reg * np.sum(W2 * W2)
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  # compute the gradients
  grads = {}
  #############################################################################
  # TODO: Compute the backward pass, computing the derivatives of the weights #
  # and biases. Store the results in the grads dictionary. For example,       #
  # grads['W1'] should store the gradient on W1, and be a matrix of same size #
  #############################################################################
  dlayer2, grads['W2'], grads['b2'] = affine_backward(dx, cache3)
  dlayer1                           = relu_backward(dlayer2, cache2)
  dLayer0, grads['W1'], grads['b1'] = affine_backward(dlayer1, cache1)

  #gradients need to have regularization term
  grads['W2'] += reg * W2
  grads['W1'] += reg * W1
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  return loss, grads
コード例 #11
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        out_affine1, cache_affine1 = layers.affine_forward(
            X, self.params["W1"], self.params["b1"])
        out_relu1, cache_relu1 = layers.relu_forward(out_affine1)
        out_affine2, cache_affine2 = layers.affine_forward(
            out_relu1, self.params["W2"], self.params["b2"])
        # no need to compute SVM/softmax loss, just give the argmax result When
        # we are in prediction.
        scores = out_affine2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # in training, compute the loss and do backprop.
        loss, dloss = layers.softmax_loss(scores, y)
        # need to add regularization here...
        loss += 0.5 * self.reg * (np.sum(self.params["W1"]**2) +
                                  np.sum(self.params["W2"]**2))
        dout_affine2 = layers.affine_backward(dloss, cache_affine2)
        grads["W2"] = dout_affine2[1] + self.reg * self.params["W2"]
        grads["b2"] = dout_affine2[2]
        dout_relu1 = layers.relu_backward(dout_affine2[0], cache_relu1)
        dout_affine1 = layers.affine_backward(dout_relu1, cache_affine1)
        grads["W1"] = dout_affine1[1] + self.reg * self.params["W1"]
        grads["b1"] = dout_affine1[2]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #12
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        caches = collections.defaultdict(list)
        out_layer = X

        for i in range(self.num_layers - 1):
            n = str(i + 1)

            # (zy) The learned parameters are for BN affine transformation used
            # in training, while the running average is used for prediction.
            if self.use_batchnorm:
                out_layer, cache = affine_bn_relu_forward(
                    out_layer, self.params["W" + n], self.params["b" + n],
                    self.params["gamma" + n], self.params["beta" + n],
                    self.bn_params[i])
                caches["affine_bn_relu"].append(cache)
            else:
                out_layer, cache = layers.affine_forward(
                    out_layer, self.params["W" + n], self.params["b" + n])
                caches["affine"].append(cache)

                out_layer, cache = layers.relu_forward(out_layer)
                caches["relu"].append(cache)

            if self.use_dropout:
                out_layer, cache = layers.dropout_forward(
                    out_layer, self.dropout_param)
                caches["drop"].append(cache)

        nn = str(self.num_layers)
        scores, cache = layers.affine_forward(out_layer, self.params["W" + nn],
                                              self.params["b" + nn])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dloss = layers.softmax_loss(scores, y)
        # for regularization
        if self.reg != 0:
            for k, v in self.params.items():
                # only include the w parameters, excluding gamma, beta and b
                if k.startswith("W"):
                    loss += 0.5 * self.reg * np.sum(v**2)

        # get the gradient
        out = layers.affine_backward(dloss, cache)
        dout, grads["W" + nn], grads["b" + nn] = out
        grads["W" + nn] += self.reg * cache[1]

        for i in range(self.num_layers - 2, -1, -1):
            n = str(i + 1)

            if self.use_dropout:
                dout = layers.dropout_backward(dout, caches["drop"][i])

            if self.use_batchnorm:
                out = affine_bn_relu_backward(dout,
                                              caches["affine_bn_relu"][i])
                dout, grads["W"+n], grads["b"+n], \
                    grads["gamma"+n], grads["beta"+n] = out
                grads["W" +
                      n] += self.reg * self.params["W" + n] if self.reg else 0

            else:
                dout = layers.relu_backward(dout, caches["relu"][i])

                out = layers.affine_backward(dout, caches["affine"][i])
                dout, grads["W" + n], grads["b" + n] = out
                # need to include regularization
                grads["W" + n] += self.reg * caches["affine"][i][1]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #13
0
ファイル: fc_net.py プロジェクト: leourbina/cs231n
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        A1, c1 = affine_relu_forward(X, W1, b1)
        Z2, c2 = affine_relu_forward(A1, W2, b2)
        scores = Z2

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        loss, dout = softmax_loss(scores, y)
        dA1, dW2, db2 = affine_relu_backward(dout, c2)
        dX, dW1, db1 = affine_relu_backward(dA1, c1)

        grads['W2'] = dW2 + self.reg * W2
        grads['W1'] = dW1 + self.reg * W1
        grads['b2'] = db2
        grads['b1'] = db1

        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #14
0
ファイル: fc_net.py プロジェクト: leourbina/cs231n
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        arg, caches = X, []

        for i in range(1, self.num_layers + 1):
            cache = {}

            W = self.params[f"W{i}"]
            b = self.params[f"b{i}"]
            arg, cache['fc_cache'] = affine_forward(arg, W, b)

            if i != self.num_layers and self.normalization:
                gamma = self.params[f"gamma{i}"]
                beta = self.params[f"beta{i}"]

                normalize_forward = batchnorm_forward if self.normalization is 'batchnorm' else layernorm_forward
                arg, cache['bn_cache'] = normalize_forward(arg, gamma, beta, self.bn_params[i-1])

            arg, cache['relu_cache'] = relu_forward(arg)

            if self.use_dropout:
                arg, cache['dropout_cache'] = dropout_forward(arg, self.dropout_param)

            caches.append(cache)

        scores = arg

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        loss, dout = softmax_loss(scores, y)

        for i in range(self.num_layers, 0, -1):
            W = self.params[f"W{i}"]
            cache = caches[i-1]

            if self.use_dropout:
                dout = dropout_backward(dout, cache['dropout_cache'])

            da = relu_backward(dout, cache['relu_cache'])

            if i != self.num_layers and self.normalization:
                normalize_backward = batchnorm_backward if self.normalization is 'batchnorm' else layernorm_backward
                da, dgamma, dbeta = batchnorm_backward(da, cache['bn_cache'])
                grads[f"gamma{i}"] = dgamma
                grads[f"beta{i}"] = dbeta

            dout, dw, db = affine_backward(da, cache['fc_cache'])

            grads[f"W{i}"] = dw + self.reg * W
            grads[f"b{i}"] = db

            loss += 0.5 * self.reg * np.sum(W * W)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #15
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1 = self.params["W1"]
        W2 = self.params["W2"]
        b1 = self.params["b1"]
        b2 = self.params["b2"]
        fc_1, cache_fc_1 = affine_forward(X, W1, b1)  # (N, H)
        relu_1, cache_relu_1 = relu_forward(fc_1)  # (N, H)
        fc_2, cache_fc_2 = affine_forward(relu_1, W2, b2)  # (N, C)
        import copy

        scores = copy.deepcopy(fc_2)

        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        loss, d_scores = softmax_loss(scores, y)

        d_relu_1, d_W2, d_b2 = affine_backward(d_scores, cache_fc_2)
        d_fc_1 = relu_backward(d_relu_1, cache_relu_1)
        dx, d_W1, d_b1 = affine_backward(d_fc_1, cache_fc_1)

        grads["W1"] = d_W1
        grads["W2"] = d_W2
        grads["b1"] = d_b1
        grads["b2"] = d_b2

        loss += 0.5 * self.reg * \
            (np.sum(np.square(self.params["W1"])) +
             np.sum(np.square(self.params["W2"])))

        grads["W2"] += self.reg * self.params["W2"]
        grads["W1"] += self.reg * self.params["W1"]
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #16
0
print('db error: ', rel_error(db_num, db))

np.random.seed(231)
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
print('Testing svm_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0],
                                 x,
                                 verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print('\nTesting softmax_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))
"""
np.random.seed(231)
N, D, H, C = 3, 5, 50, 7
X = np.random.randn(N, D)
y = np.random.randint(C, size=N)

std = 1e-3
コード例 #17
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        out = X
        caches = []
        for i in range(self.num_layers):
            w_name = 'W{}'.format(i)
            b_name = 'b{}'.format(i)

            w = self.params[w_name]
            b = self.params[b_name]

            if i == self.num_layers - 1:
                out, cache = layer_utils.affine_forward(out, w, b)
            else:
                out, cache = layer_utils.affine_relu_forward(out, w, b)

            caches.append(cache)

        scores = out

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        softmax_loss, dsoftmax = layers.softmax_loss(scores, y)

        reg_loss = 0
        for key in self.params.keys():
            if key.startswith('W'):
                w = self.params[key]
                reg_loss += self.reg * np.sum(w * w) * 0.5

        loss = softmax_loss + reg_loss

        dx = dsoftmax
        for i in reversed(range(self.num_layers)):
            w_name = 'W{}'.format(i)
            b_name = 'b{}'.format(i)

            if i == self.num_layers - 1:
                dx, dw, db = layer_utils.affine_backward(dx, caches[i])
            else:
                dx, dw, db = layer_utils.affine_relu_backward(dx, caches[i])

            grads[w_name] = dw
            grads[b_name] = db

        for key in self.params.keys():
            if key.startswith('W'):
                w = self.params[key]
                grads[key] += self.reg * w

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #18
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################

        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N = X.shape[0]
        D = np.prod(X.shape[1:])

        X_ = X.reshape(N, D)
        A, fc1_cache = affine_forward(X_, W1, b1)
        R, relu_cache = relu_forward(A)
        scores, fc2_cache = affine_forward(R, W2, b2)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        loss, dscores = softmax_loss(scores, y)
        dR, dW2, db2 = affine_backward(dscores, fc2_cache)
        dA = relu_backward(dR, relu_cache)
        dX, dW1, db1 = affine_backward(dA, fc1_cache)

        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
        dW2 += self.reg * W2
        dW1 += self.reg * W1

        grads = {'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2}

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
コード例 #19
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        out_1, cache_1 = layer_utils.affine_relu_forward(
            X, self.params['W1'], self.params['b1'])
        out_2, cache_2 = layer_utils.affine_relu_forward(
            out_1, self.params['W2'], self.params['b2'])
        scores = out_2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dsoftmax = layers.softmax_loss(scores, y)
        dl2, dw2, db2 = layer_utils.affine_relu_backward(dsoftmax, cache_2)
        _, dw1, db1 = layer_utils.affine_relu_backward(dl2, cache_1)

        # add regularization loss
        for w in [self.params['W1'], self.params['W2']]:
            loss += self.reg * np.sum(w * w) * 0.5

        grads['W1'] = dw1 + self.reg * self.params['W1']
        grads['W2'] = dw2 + self.reg * self.params['W2']
        grads['b1'] = db1
        grads['b2'] = db2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads