def test_relu_forward_multiple_dim(dim):
    testing_shape = []
    for y in range(0, dim):
        testing_shape.append(np.random.randint(3, 8))
    shape = tuple(testing_shape)
    #y = np.random.randn(*testing_shape)
    x = np.random.standard_normal(shape)
    assert x.shape == relu_forward(x)[0].shape
    x[x < 0] = 0
    assert rel_error(x, relu_forward(x)[0]) < 5e-7
def test_relu_forward_multiple_dim(dim):
    testing_shape = []
    for y in range(0,dim):
        testing_shape.append(np.random.randint(3,8))
    shape = tuple(testing_shape)
    #y = np.random.randn(*testing_shape)
    x = np.random.standard_normal(shape)
    assert x.shape == relu_forward(x)[0].shape
    x[x<0] = 0
    assert rel_error(x, relu_forward(x)[0]) < 5e-7
def test_relu_forward():
    # Test the relu_forward function
    x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

    out, _ = relu_forward(x)
    correct_out = np.array([[
        0.,
        0.,
        0.,
        0.,
    ], [
        0.,
        0.,
        0.04545455,
        0.13636364,
    ], [
        0.22727273,
        0.31818182,
        0.40909091,
        0.5,
    ]])

    # Compare your output with ours. The error should be around 1e-8
    assert out.shape == correct_out.shape
    assert rel_error(out, correct_out) < 5e-7
Exemple #4
0
        def train_loss(*args):
            X = args[0]
            y = args[1]

            res = X
            for l in xrange(self.num_layers):
                prev_res = res
                res = affine_forward(prev_res, args[self.w_idx(l)],
                                     args[self.b_idx(l)])

                if l < (self.num_layers - 1):
                    if self.use_batchnorm:
                        res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                                args[self.bn_bt_idx(l)],
                                                self.bn_params[l])
                    res = relu_forward(res)
                    if self.use_dropout:
                        res = dropout_forward(res, self.dropout_param)

            scores = res

            if mode == 'test':
                return scores

            #loss, _ = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            return loss
Exemple #5
0
    def train_loss(*args):
      X = args[0]
      y = args[1]

      res = X
      for l in xrange(self.num_layers):
        prev_res = res
        res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)])

        if l < (self.num_layers - 1):
          if self.use_batchnorm:
            res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                    args[self.bn_bt_idx(l)], self.bn_params[l])
          res = relu_forward(res)
          if self.use_dropout:
            res = dropout_forward(res, self.dropout_param)

      scores = res

      if mode == 'test':
        return scores

      #loss, _ = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      return loss
def test_relu_forward():
    # Test the relu_forward function
    x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

    out, _ = relu_forward(x)
    correct_out = np.array([[ 0.,          0.,          0.,          0.,        ],
                            [ 0.,          0.,          0.04545455,  0.13636364,],
                            [ 0.22727273,  0.31818182,  0.40909091,  0.5,       ]])

    # Compare your output with ours. The error should be around 1e-8
    assert out.shape == correct_out.shape
    assert rel_error(out, correct_out) < 5e-7
Exemple #7
0
def affine_relu_forward(x, w, b):
  """
  Convenience layer that perorms an affine transform followed by a ReLU

  Inputs:
  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
  a = affine_forward(x, w, b)
  out = relu_forward(a)
  return out
Exemple #8
0
def affine_relu_forward(x, w, b):
    """
  Convenience layer that perorms an affine transform followed by a ReLU

  Inputs:
  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
    a = affine_forward(x, w, b)
    out = relu_forward(a)
    return out
Exemple #9
0
def conv_relu_forward(x, w, b, conv_param):
    """
    A convenience layer that performs a convolution followed by a ReLU.

    Inputs:
    - x: Input to the convolutional layer
    - w, b, conv_param: Weights and parameters for the convolutional layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
    out, relu_cache = relu_forward(a)
    cache = (conv_cache, relu_cache)
    return out, cache
Exemple #10
0
def conv_relu_forward(x, w, b, conv_param):
    """
  A convenience layer that performs a convolution followed by a ReLU.

  Inputs:
  - x: Input to the convolutional layer
  - w, b, conv_param: Weights and parameters for the convolutional layer
  
  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
    out, relu_cache = relu_forward(a)
    cache = (conv_cache, relu_cache)
    return out, cache
Exemple #11
0
def affine_batchnorm_relu_forward(x, w, b, gamma, beta, bn_param):
    """
    Convenience layer that performs Affine->BatchNorm->ReLU

    Inputs:
    - x: Input to the affine layer
    - w, b: Weights for the affine layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    a, fc_cache = affine_forward(x, w, b)
    b, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
    out, relu_cache = relu_forward(b)
    cache = (fc_cache, bn_cache, relu_cache)
    return out, cache
Exemple #12
0
def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
    """
    Convenience layer that perorms an affine transform, batch normalization and
    then a Relu activation.

    Inputs:
    - x: Input to the affine layer
    - w, b: Weights for the affine layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    out, fc_cache = layers.affine_forward(x, w, b)
    out, bn_cache = layers.batchnorm_forward(out, gamma, beta, bn_param)
    out, relu_cache = layers.relu_forward(out)
    cache = fc_cache, bn_cache, relu_cache,
    return out, cache
Exemple #13
0
def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
    """
    Convenience layer that performs a convolution, a ReLU, and a pool.

    Inputs:
    - x: Input to the convolutional layer
    - w, b, conv_param: Weights and parameters for the convolutional layer
    - pool_param: Parameters for the pooling layer

    Returns a tuple of:
    - out: Output from the pooling layer
    - cache: Object to give to the backward pass
    """
    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
    s, relu_cache = relu_forward(a)
    out, pool_cache = max_pool_forward_fast(s, pool_param)
    cache = (conv_cache, relu_cache, pool_cache)
    return out, cache
Exemple #14
0
def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
    """
  Convenience layer that performs a convolution, a ReLU, and a pool.

  Inputs:
  - x: Input to the convolutional layer
  - w, b, conv_param: Weights and parameters for the convolutional layer
  - pool_param: Parameters for the pooling layer

  Returns a tuple of:
  - out: Output from the pooling layer
  - cache: Object to give to the backward pass
  """
    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
    s, relu_cache = relu_forward(a)
    out, pool_cache = max_pool_forward_fast(s, pool_param)
    cache = (conv_cache, relu_cache, pool_cache)
    return out, cache
Exemple #15
0
def combo_forward(x, w, b, gamma, beta, bn_param):
    """
    Combo layer forward: FC -> BN -> ReLU
    
    Inputs:
    - x: Input to the affine layer
    - w, b: Weights for the affine layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    bn_cache = None

    a, fc_cache = affine_forward(x, w, b)
    if bn_param is not None:
        a, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
    out, relu_cache = relu_forward(a)
    cache = (fc_cache, bn_cache, relu_cache)
    return out, cache
Exemple #16
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        """
        if self.use_dropout:
            self.dropout_param['mode']=mode
       """
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        X_temp = X
        affine_Input = list()
        relu_input = list()
        batchnorm_input = list()
        dropout_input = list()
        score_tmp = None
        for i in range(self.num_layers - 1):
            tmp, affine_input_tmp = affine_forward(
                X_temp, self.params['W' + str(i + 1)],
                self.params['b' + str(i + 1)])
            if self.use_batchnorm:
                tmp, batchnorm_cache = batchnorm_forward(
                    tmp, self.params['gamma' + str(i + 1)],
                    self.params['beta' + str(i + 1)], self.bn_params[i])
                batchnorm_input.append(batchnorm_cache)
            score_tmp, relu_input_tmp = relu_forward(tmp)
            if self.use_dropout:
                score_tmp, dropout_cache = dropout_forward(
                    score_tmp, self.dropout_param)
                dropout_input.append(dropout_cache)
            affine_Input.append(affine_input_tmp)
            relu_input.append(relu_input_tmp)
            X_temp = score_tmp
        scores, last_input_tmp = affine_forward(
            score_tmp, self.params['W' + str(self.num_layers)],
            self.params['b' + str(self.num_layers)])
        affine_Input.append(last_input_tmp)
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        if mode == 'test':
            return scores
        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        num_trains = X.shape[0]
        loss, dscores = softmax_loss(scores, y)
        weight_decay_sum = 0
        for i in range(self.num_layers):
            tmp = np.sum(self.params['W' + str(i + 1)] *
                         self.params['W' + str(i + 1)])
            weight_decay_sum = weight_decay_sum + tmp

        loss = loss + 0.5 * self.reg * weight_decay_sum
        #softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        #softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        dout = dscores
        for i in range(self.num_layers):
            dx, dw, db = affine_backward(dout, affine_Input[-(i + 1)])
            grads['W' +
                  str(self.num_layers - i)] = dw + self.reg * self.params[
                      'W' + str(self.num_layers - i)]
            grads['b' + str(self.num_layers - i)] = db
            if self.use_dropout and i != self.num_layers - 1:
                dx = dropout_backward(dx, dropout_input[-(i + 1)])
            if i != self.num_layers - 1:
                dout = relu_backward(dx, relu_input[-(i + 1)])
            if i != self.num_layers - 1 and self.use_batchnorm:
                dout, dgamma, dbeta = batchnorm_backward(
                    dout, batchnorm_input[-(i + 1)])
                grads['gamma' + str(self.num_layers - i - 1)] = dgamma
                grads['beta' + str(self.num_layers - i - 1)] = dbeta

        return loss, grads
Exemple #17
0
def two_layer_net(X, model, y=None, reg=0.0):
  """
  Compute the loss and gradients for a two layer fully connected neural network.
  The net has an input dimension of D, a hidden layer dimension of H, and
  performs classification over C classes. We use a softmax loss function and L2
  regularization the the weight matrices. The two layer net should use a ReLU
  nonlinearity after the first affine layer.

  The two layer net has the following architecture:

  input - fully connected layer - ReLU - fully connected layer - softmax

  The outputs of the second fully-connected layer are the scores for each
  class.

  Inputs:
  - X: Input data of shape (N, D). Each X[i] is a training sample.
  - model: Dictionary mapping parameter names to arrays of parameter values.
    It should contain the following:
    - W1: First layer weights; has shape (D, H)
    - b1: First layer biases; has shape (H,)
    - W2: Second layer weights; has shape (H, C)
    - b2: Second layer biases; has shape (C,)
  - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
    an integer in the range 0 <= y[i] < C. This parameter is optional; if it
    is not passed then we only return scores, and if it is passed then we
    instead return the loss and gradients.
  - reg: Regularization strength.

  Returns:
  If y not is passed, return a matrix scores of shape (N, C) where scores[i, c]
  is the score for class c on input X[i].

  If y is not passed, instead return a tuple of:
  - loss: Loss (data loss and regularization loss) for this batch of training
    samples.
  - grads: Dictionary mapping parameter names to gradients of those parameters
    with respect to the loss function. This should have the same keys as model.
  """

  # unpack variables from the model dictionary
  W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
  N, D = X.shape

  # compute the forward pass
  scores = None
  #############################################################################
  # TODO: Perform the forward pass, computing the class scores for the input. #
  # Store the result in the scores variable, which should be an array of      #
  # shape (N, C).                                                             #
  #############################################################################
  # relu = lambda x: np.maximum(x,0)
  # H, C = W2.shape
  # scores = np.zeros((N,C))
  # layer1 = np.maximum(np.dot(X,W1) + b1,0)
  # scores = np.dot(layer1,W2) + b2
  ## above is the test implementation
  ## NOW, using cs231n/layers.py
  ## NOTICE define layer0 = X
  # then behaviour is 'functional' layer(n+1) = f(layer(n) | parameters)
  from cs231n.layers import affine_forward, relu_forward, softmax_loss
  from cs231n.layers import affine_backward, relu_backward

  layer1, cache1 = affine_forward(X, W1, b1)
  layer2, cache2 = relu_forward(layer1)
  layer3, cache3 = affine_forward(layer2, W2, b2)

  scores = layer3
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################
  
  # If the targets are not given then jump out, we're done
  if y is None:
    return scores

  # compute the loss
  loss = None
  #############################################################################
  # TODO: Finish the forward pass, and compute the loss. This should include  #
  # both the data loss and L2 regularization for W1 and W2. Store the result  #
  # in the variable loss, which should be a scalar. Use the Softmax           #
  # classifier loss. So that your results match ours, multiply the            #
  # regularization loss by 0.5                                                #
  #############################################################################
  # rows   = np.sum(np.exp(scores), axis=1)
  # layer4 = np.mean(-layer3[range(N), y] + np.log(rows))
  # loss   = layer4 + 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
  # 
  loss, dx = softmax_loss(scores, y)
  loss += 0.5 * reg * np.sum(W1*W1) + 0.5 * reg * np.sum(W2 * W2)
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  # compute the gradients
  grads = {}
  #############################################################################
  # TODO: Compute the backward pass, computing the derivatives of the weights #
  # and biases. Store the results in the grads dictionary. For example,       #
  # grads['W1'] should store the gradient on W1, and be a matrix of same size #
  #############################################################################
  dlayer2, grads['W2'], grads['b2'] = affine_backward(dx, cache3)
  dlayer1                           = relu_backward(dlayer2, cache2)
  dLayer0, grads['W1'], grads['b1'] = affine_backward(dlayer1, cache1)

  #gradients need to have regularization term
  grads['W2'] += reg * W2
  grads['W1'] += reg * W1
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  return loss, grads
Exemple #18
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################

        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N = X.shape[0]
        D = np.prod(X.shape[1:])

        X_ = X.reshape(N, D)
        A, fc1_cache = affine_forward(X_, W1, b1)
        R, relu_cache = relu_forward(A)
        scores, fc2_cache = affine_forward(R, W2, b2)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        loss, dscores = softmax_loss(scores, y)
        dR, dW2, db2 = affine_backward(dscores, fc2_cache)
        dA = relu_backward(dR, relu_cache)
        dX, dW1, db1 = affine_backward(dA, fc1_cache)

        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
        dW2 += self.reg * W2
        dW1 += self.reg * W1

        grads = {'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2}

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Exemple #19
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        caches = collections.defaultdict(list)
        out_layer = X

        for i in range(self.num_layers - 1):
            n = str(i + 1)

            # (zy) The learned parameters are for BN affine transformation used
            # in training, while the running average is used for prediction.
            if self.use_batchnorm:
                out_layer, cache = affine_bn_relu_forward(
                    out_layer, self.params["W" + n], self.params["b" + n],
                    self.params["gamma" + n], self.params["beta" + n],
                    self.bn_params[i])
                caches["affine_bn_relu"].append(cache)
            else:
                out_layer, cache = layers.affine_forward(
                    out_layer, self.params["W" + n], self.params["b" + n])
                caches["affine"].append(cache)

                out_layer, cache = layers.relu_forward(out_layer)
                caches["relu"].append(cache)

            if self.use_dropout:
                out_layer, cache = layers.dropout_forward(
                    out_layer, self.dropout_param)
                caches["drop"].append(cache)

        nn = str(self.num_layers)
        scores, cache = layers.affine_forward(out_layer, self.params["W" + nn],
                                              self.params["b" + nn])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dloss = layers.softmax_loss(scores, y)
        # for regularization
        if self.reg != 0:
            for k, v in self.params.items():
                # only include the w parameters, excluding gamma, beta and b
                if k.startswith("W"):
                    loss += 0.5 * self.reg * np.sum(v**2)

        # get the gradient
        out = layers.affine_backward(dloss, cache)
        dout, grads["W" + nn], grads["b" + nn] = out
        grads["W" + nn] += self.reg * cache[1]

        for i in range(self.num_layers - 2, -1, -1):
            n = str(i + 1)

            if self.use_dropout:
                dout = layers.dropout_backward(dout, caches["drop"][i])

            if self.use_batchnorm:
                out = affine_bn_relu_backward(dout,
                                              caches["affine_bn_relu"][i])
                dout, grads["W"+n], grads["b"+n], \
                    grads["gamma"+n], grads["beta"+n] = out
                grads["W" +
                      n] += self.reg * self.params["W" + n] if self.reg else 0

            else:
                dout = layers.relu_backward(dout, caches["relu"][i])

                out = layers.affine_backward(dout, caches["affine"][i])
                dout, grads["W" + n], grads["b" + n] = out
                # need to include regularization
                grads["W" + n] += self.reg * caches["affine"][i][1]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Exemple #20
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        arg, caches = X, []

        for i in range(1, self.num_layers + 1):
            cache = {}

            W = self.params[f"W{i}"]
            b = self.params[f"b{i}"]
            arg, cache['fc_cache'] = affine_forward(arg, W, b)

            if i != self.num_layers and self.normalization:
                gamma = self.params[f"gamma{i}"]
                beta = self.params[f"beta{i}"]

                normalize_forward = batchnorm_forward if self.normalization is 'batchnorm' else layernorm_forward
                arg, cache['bn_cache'] = normalize_forward(arg, gamma, beta, self.bn_params[i-1])

            arg, cache['relu_cache'] = relu_forward(arg)

            if self.use_dropout:
                arg, cache['dropout_cache'] = dropout_forward(arg, self.dropout_param)

            caches.append(cache)

        scores = arg

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        loss, dout = softmax_loss(scores, y)

        for i in range(self.num_layers, 0, -1):
            W = self.params[f"W{i}"]
            cache = caches[i-1]

            if self.use_dropout:
                dout = dropout_backward(dout, cache['dropout_cache'])

            da = relu_backward(dout, cache['relu_cache'])

            if i != self.num_layers and self.normalization:
                normalize_backward = batchnorm_backward if self.normalization is 'batchnorm' else layernorm_backward
                da, dgamma, dbeta = batchnorm_backward(da, cache['bn_cache'])
                grads[f"gamma{i}"] = dgamma
                grads[f"beta{i}"] = dbeta

            dout, dw, db = affine_backward(da, cache['fc_cache'])

            grads[f"W{i}"] = dw + self.reg * W
            grads[f"b{i}"] = db

            loss += 0.5 * self.reg * np.sum(W * W)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b,
                                       dout)

_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)

# The error should be around 1e-10
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

# Test the relu_forward function
x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

out, _ = relu_forward(x)
correct_out = np.array([[
    0.,
    0.,
    0.,
    0.,
], [
    0.,
    0.,
    0.04545455,
    0.13636364,
], [
    0.22727273,
    0.31818182,
    0.40909091,
    0.5,
Exemple #22
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        hidden1_out, h1_cache = affine_forward(X, self.params['W1'],
                                               self.params['b1'])
        relu_out, relu_cache = relu_forward(hidden1_out)
        scores, h2_cache = affine_forward(relu_out, self.params['W2'],
                                          self.params['b2'])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        """
        X_reshape=np.reshape(X,(X.shape[0],-1))
        num_trains=X.shape[0]
        loss,_=softmax_loss(scores,y)
        loss=loss+self.reg*0.5*(np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']))
        softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        grads['b2']=np.zeros_like(self.params['b2'])
        grads['W2']=np.zeros_like(self.params['W2'])
        grads['b1']=np.zeros_like(self.params['b1'])
        grads['W1']=np.zeros_like(self.params['W1'])
        grads['b2']=np.sum(softmax_output,axis=0)
        grads['W2']=np.dot(relu_out.T,softmax_output)
        grads_b1_tmp=np.dot(softmax_output,self.params['W2'].T)
        tmp=(relu_out>0)*grads_b1_tmp
        grads['b1']=np.sum(tmp,axis=0)
        grads['W1']=np.dot(X_reshape.T,grads_b1_tmp)
        grads['W1']=grads['W1']/num_trains+self.reg*self.params['W1']
        grads['b1']=grads['b1']/num_trains
        grads['W2']=grads['W2']/num_trains+self.reg*self.params['W2']
        grads['b2']=grads['b2']/num_trains
        """
        num_trains = X.shape[0]
        loss, dscore = softmax_loss(scores, y)
        loss = loss + self.reg * 0.5 * (
            np.sum(self.params['W2'] * self.params['W2']) +
            np.sum(self.params['W1'] * self.params['W1']))
        grads_h2, grads_w2, grads_b2 = affine_backward(dout=dscore,
                                                       cache=h2_cache)
        grads_relu = relu_backward(grads_h2, relu_cache)
        grads_h1, grads_w1, grads_b1 = affine_backward(grads_relu, h1_cache)
        grads['W1'] = grads_w1 + self.reg * self.params['W1']
        grads['W2'] = grads_w2 + self.reg * self.params['W2']
        grads['b1'] = grads_b1
        grads['b2'] = grads_b2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        return loss, grads
Exemple #23
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        out_affine1, cache_affine1 = layers.affine_forward(
            X, self.params["W1"], self.params["b1"])
        out_relu1, cache_relu1 = layers.relu_forward(out_affine1)
        out_affine2, cache_affine2 = layers.affine_forward(
            out_relu1, self.params["W2"], self.params["b2"])
        # no need to compute SVM/softmax loss, just give the argmax result When
        # we are in prediction.
        scores = out_affine2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # in training, compute the loss and do backprop.
        loss, dloss = layers.softmax_loss(scores, y)
        # need to add regularization here...
        loss += 0.5 * self.reg * (np.sum(self.params["W1"]**2) +
                                  np.sum(self.params["W2"]**2))
        dout_affine2 = layers.affine_backward(dloss, cache_affine2)
        grads["W2"] = dout_affine2[1] + self.reg * self.params["W2"]
        grads["b2"] = dout_affine2[2]
        dout_relu1 = layers.relu_backward(dout_affine2[0], cache_relu1)
        dout_affine1 = layers.affine_backward(dout_relu1, cache_affine1)
        grads["W1"] = dout_affine1[1] + self.reg * self.params["W1"]
        grads["b1"] = dout_affine1[2]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Exemple #24
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1 = self.params["W1"]
        W2 = self.params["W2"]
        b1 = self.params["b1"]
        b2 = self.params["b2"]
        fc_1, cache_fc_1 = affine_forward(X, W1, b1)  # (N, H)
        relu_1, cache_relu_1 = relu_forward(fc_1)  # (N, H)
        fc_2, cache_fc_2 = affine_forward(relu_1, W2, b2)  # (N, C)
        import copy

        scores = copy.deepcopy(fc_2)

        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        loss, d_scores = softmax_loss(scores, y)

        d_relu_1, d_W2, d_b2 = affine_backward(d_scores, cache_fc_2)
        d_fc_1 = relu_backward(d_relu_1, cache_relu_1)
        dx, d_W1, d_b1 = affine_backward(d_fc_1, cache_fc_1)

        grads["W1"] = d_W1
        grads["W2"] = d_W2
        grads["b1"] = d_b1
        grads["b2"] = d_b2

        loss += 0.5 * self.reg * \
            (np.sum(np.square(self.params["W1"])) +
             np.sum(np.square(self.params["W2"])))

        grads["W2"] += self.reg * self.params["W2"]
        grads["W1"] += self.reg * self.params["W1"]
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Exemple #25
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = "test" if y is None else "train"

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param["mode"] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param["mode"] = mode
        """
        loss 과정에서 활용할 리스트들
        [i] : i번째 layer의 변수들
        """
        fc = []
        relu = []
        bn = []
        dropout = []
        cache_bn = []
        cache_fc = []
        cache_relu = []
        cache_dropout = []
        fc.append(0)
        bn.append(0)
        relu.append(X)
        dropout.append(0)
        cache_bn.append(0)
        cache_dropout.append(0)
        cache_fc.append(0)
        cache_relu.append(0)
        # 맨 처음 trian data X를 집어넣어준다
        # 0으로 모든 리스트를 초기화해준다
        # 이러한 작업을 해주는 이유 : 인덱스를 1부터 L-1까지 활용하기 위함
        """
        fc_i : i번째 layer의 output
        cache_fc_i : i번째 layer의 input
        """
        for i in range(1, self.num_layers):  # 1부터 L-1까지
            # affine
            fc_i, cache_fc_i = affine_forward(relu[i - 1],
                                              self.params["W" + str(i)],
                                              self.params["b" + str(i)])
            fc.append(fc_i)
            cache_fc.append(cache_fc_i)
            if self.use_batchnorm:
                # batchnorm
                bn_i, cache_bn_i = batchnorm_forward(
                    fc_i,
                    gamma=self.params["gamma" + str(i)],
                    beta=self.params["beta" + str(i)],
                    bn_param=self.bn_params[i - 1],
                )
                bn.append(bn_i)
                cache_bn.append(cache_bn_i)
                # relu
                relu_i, cache_relu_i = relu_forward(bn_i)
                relu.append(relu_i)
                cache_relu.append(cache_relu_i)
            else:
                # relu
                relu_i, cache_relu_i = relu_forward(fc[i])
                relu.append(relu_i)
                cache_relu.append(cache_relu_i)

            # dropout layer
            if self.use_dropout:
                dropout_i, cache_dropout_i = dropout_forward(
                    relu_i, dropout_param=self.dropout_param)
                dropout.append(dropout_i)
                cache_dropout.append(cache_dropout_i)

        # 마지막 L번째 layer : affine & softmax
        fc_L, cache_fc_L = affine_forward(
            dropout[-1] if self.use_dropout else relu[-1],
            self.params["W" + str(self.num_layers)],
            self.params["b" + str(self.num_layers)])
        fc.append(fc_L)
        cache_fc.append(cache_fc_L)

        # (N,C)

        scores = fc[self.num_layers]
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == "test":
            return scores

        loss, grads = 0.0, {}
        loss, d_scores = softmax_loss(scores, y)

        dx_ = []
        dfc = []
        drelu = []
        dbatch = []
        ddropout = []

        # 맨 마지막 Layer
        drelu_L, dWL, dbL = affine_backward(d_scores,
                                            cache_fc[self.num_layers])
        dfc.append(d_scores)
        dx_.append(drelu_L)
        grads["W" + str(self.num_layers)] = dWL
        grads["b" + str(self.num_layers)] = dbL

        for i in range(self.num_layers - 1, 0,
                       -1):  # N-1, 1 : all hidden layer
            # dropout backward
            if self.use_dropout:
                ddropout_i = dropout_backward(dx_[-1], cache_dropout[i])
                ddropout.append(ddropout_i)

            # relu backward
            d_fc = relu_backward(ddropout[-1] if self.use_dropout else dx_[-1],
                                 cache_relu[i])

            # batch normalization
            if self.use_batchnorm:
                # vriable name = d_fc이지만 사실은 d_batch
                dbatch.append(d_fc)
                # print('i = ', i)
                # print('length of cache_bn = ', len(cache_bn))
                d_fc, dgamma, dbeta = batchnorm_backward(dbatch[-1],
                                                         cache=cache_bn[i])
                grads["gamma" + str(i)] = dgamma
                grads["beta" + str(i)] = dbeta
                dfc.append(d_fc)
            else:
                dfc.append(d_fc)

            # affine backward
            dx, dw, db = affine_backward(dfc[-1], cache_fc[i])
            dx_.append(dx)
            grads["W" + str(i)] = dw
            grads["b" + str(i)] = db
            # if (i == 1):
            # print(i)

        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads