Beispiel #1
0
def rnn_step_full_forward(x, prev_h, Wx, Wh, bh, Ws, bs, y, mask):
    current_h = np.tanh(x.dot(Wx) + prev_h.dot(Wh) + bh)
    current_score, cache_affine = affine_forward(current_h, Ws, bs)
    current_loss, dscore = softmax_loss_mask(current_score, y, mask)
#     current_loss, dscore = softmax_loss(current_score, y)
    cache = (x, prev_h, Wx, Wh, bh, Ws, bs, current_h, cache_affine, dscore)
    return current_h, current_loss, cache
Beispiel #2
0
    def train_loss(*args):
      X = args[0]
      y = args[1]

      res = X
      for l in xrange(self.num_layers):
        prev_res = res
        res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)])

        if l < (self.num_layers - 1):
          if self.use_batchnorm:
            res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                    args[self.bn_bt_idx(l)], self.bn_params[l])
          res = relu_forward(res)
          if self.use_dropout:
            res = dropout_forward(res, self.dropout_param)

      scores = res

      if mode == 'test':
        return scores

      #loss, _ = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      return loss
Beispiel #3
0
        def train_loss(*args):
            X = args[0]
            y = args[1]

            res = X
            for l in xrange(self.num_layers):
                prev_res = res
                res = affine_forward(prev_res, args[self.w_idx(l)],
                                     args[self.b_idx(l)])

                if l < (self.num_layers - 1):
                    if self.use_batchnorm:
                        res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                                args[self.bn_bt_idx(l)],
                                                self.bn_params[l])
                    res = relu_forward(res)
                    if self.use_dropout:
                        res = dropout_forward(res, self.dropout_param)

            scores = res

            if mode == 'test':
                return scores

            #loss, _ = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            return loss
Beispiel #4
0
 def train_loss(X, y, W1, W2, b1, b2):
     l1 = affine_relu_forward(X, W1, b1)
     l2 = affine_forward(l1, W2, b2)
     scores = l2
     if y:
         #[TODO]: softmax is not supported yet
         # loss, d_scores = softmax_loss(scores, y)
         loss = svm_loss(scores, y)
         loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
             W2**2) * 0.5 * self.reg
     return loss_with_reg
def test_affine_backward():
    x = np.random.randn(10, 2, 3)
    w = np.random.randn(6, 5)
    b = np.random.randn(5)
    dout = np.random.randn(10, 5)

    dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
    dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
    db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

    _, cache = affine_forward(x, w, b)
    dx, dw, db = affine_backward(dout, cache)

    assert dx.shape == dx.shape
    assert dw.shape == dw.shape
    assert db.shape == db.shape

    assert rel_error(dx_num,dx) < 5e-7
    assert rel_error(dw_num,dw) < 5e-7
    assert rel_error(db_num,db) < 5e-7
Beispiel #6
0
def test_affine_backward():
	x = np.random.randn(10, 2, 3)
	w = np.random.randn(6, 5)
	b = np.random.randn(5)
	dout = np.random.randn(10, 5)

	dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
	dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
	db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

	_, cache = affine_forward(x, w, b)
	dx, dw, db = affine_backward(dout, cache)

	assert dx.shape == dx.shape
	assert dw.shape == dw.shape
	assert db.shape == db.shape

	assert rel_error(dx_num,dx) < 5e-7
	assert rel_error(dw_num,dw) < 5e-7
	assert rel_error(db_num,db) < 5e-7
Beispiel #7
0
        def train_loss(X, y, W1, W2, b1, b2):
            l1 = affine_relu_forward(X, W1, b1)
            l2 = affine_forward(l1, W2, b2)
            scores = l2

            if y is None:
                return scores

            #[TODO]: softmax is not supported yet
            # loss, d_scores = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
                W2**2) * 0.5 * self.reg

            return loss_with_reg
Beispiel #8
0
def affine_relu_forward(x, w, b):
  """
  Convenience layer that perorms an affine transform followed by a ReLU

  Inputs:
  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
  a = affine_forward(x, w, b)
  out = relu_forward(a)
  return out
Beispiel #9
0
def affine_relu_forward(x, w, b):
    """
  Convenience layer that perorms an affine transform followed by a ReLU

  Inputs:
  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
    a = affine_forward(x, w, b)
    out = relu_forward(a)
    return out
Beispiel #10
0
def lstm_step_forward(x, prev_h, prev_c, Wx, Wh, b):
    """
    Forward pass for a single timestep of an LSTM.

    The input data has dimension D, the hidden state has dimension H, and we use
    a minibatch size of N.

    Note that a sigmoid() function has already been provided for you in this file.

    Inputs:
    - x: Input data, of shape (N, D)
    - prev_h: Previous hidden state, of shape (N, H)
    - prev_c: previous cell state, of shape (N, H)
    - Wx: Input-to-hidden weights, of shape (D, 4H)
    - Wh: Hidden-to-hidden weights, of shape (H, 4H)
    - b: Biases, of shape (4H,)

    Returns a tuple of:
    - next_h: Next hidden state, of shape (N, H)
    - next_c: Next cell state, of shape (N, H)
    - cache: Tuple of values needed for backward pass.
    """
    next_h, next_c, cache = None, None, None
    #############################################################################
    # TODO: Implement the forward pass for a single timestep of an LSTM.        #
    # You may want to use the numerically stable sigmoid implementation above.  #
    #############################################################################
    N, D = x.shape
    _, H = prev_h.shape
    W = np.concatenate((Wx, Wh), axis=0)
    inputs = np.concatenate((x, prev_h), axis=1)
    gates, cache_gates = affine_forward(inputs, W, b)  # N x 4H
    i, f, o, g = np.split(gates, 4, axis=1)
    i = sigmoid(i)  # N x H
    f = sigmoid(f)
    o = sigmoid(o)
    g = np.tanh(g)
    next_c = f * prev_c + i * g
    tanh_next_c = np.tanh(next_c)
    next_h = o * tanh_next_c
    cache = (o, tanh_next_c, prev_c, cache_gates, i, f, o, g, D)
    ##############################################################################
    #                               END OF YOUR CODE                             #
    ##############################################################################

    return next_h, next_c, cache
Beispiel #11
0
def affine_batchnorm_relu_forward(x, w, b, gamma, beta, bn_param):
    """
    Convenience layer that performs Affine->BatchNorm->ReLU

    Inputs:
    - x: Input to the affine layer
    - w, b: Weights for the affine layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    a, fc_cache = affine_forward(x, w, b)
    b, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
    out, relu_cache = relu_forward(b)
    cache = (fc_cache, bn_cache, relu_cache)
    return out, cache
Beispiel #12
0
def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
    """
    Convenience layer that perorms an affine transform, batch normalization and
    then a Relu activation.

    Inputs:
    - x: Input to the affine layer
    - w, b: Weights for the affine layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    out, fc_cache = layers.affine_forward(x, w, b)
    out, bn_cache = layers.batchnorm_forward(out, gamma, beta, bn_param)
    out, relu_cache = layers.relu_forward(out)
    cache = fc_cache, bn_cache, relu_cache,
    return out, cache
Beispiel #13
0
def test_affine_forward():
	num_inputs = 2
	input_shape = (4, 5, 6)
	output_dim = 3

	input_size = num_inputs * np.prod(input_shape)
	weight_size = output_dim * np.prod(input_shape)

	x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
	w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
	b = np.linspace(-0.3, 0.1, num=output_dim)

	out, _ = affine_forward(x, w, b)
	correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
	                        [ 3.25553199,  3.5141327,   3.77273342]])

	# Compare your output with ours. The error should be around 1e-9.
	assert out.shape == correct_out.shape
	assert rel_error(out, correct_out) < 5e-7
Beispiel #14
0
def test_affine_forward():
    num_inputs = 2
    input_shape = (4, 5, 6)
    output_dim = 3

    input_size = num_inputs * np.prod(input_shape)
    weight_size = output_dim * np.prod(input_shape)

    x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
    w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
    b = np.linspace(-0.3, 0.1, num=output_dim)

    out, _ = affine_forward(x, w, b)
    correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                            [ 3.25553199,  3.5141327,   3.77273342]])

    # Compare your output with ours. The error should be around 1e-9.
    assert out.shape == correct_out.shape
    assert rel_error(out, correct_out) < 5e-7
Beispiel #15
0
def combo_forward(x, w, b, gamma, beta, bn_param):
    """
    Combo layer forward: FC -> BN -> ReLU
    
    Inputs:
    - x: Input to the affine layer
    - w, b: Weights for the affine layer

    Returns a tuple of:
    - out: Output from the ReLU
    - cache: Object to give to the backward pass
    """
    bn_cache = None

    a, fc_cache = affine_forward(x, w, b)
    if bn_param is not None:
        a, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
    out, relu_cache = relu_forward(a)
    cache = (fc_cache, bn_cache, relu_cache)
    return out, cache
Beispiel #16
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        combo_caches = []
        fc_cache = None

        N = X.shape[0]
        D = np.prod(X.shape[1:])
        x_ = X.reshape(N, D)

        # middle combo layers
        for layer in range(1, self.num_layers):  #[1, 2, ..., L-1]
            w = self.params['W' + str(layer)]
            b = self.params['b' + str(layer)]

            # prepare for batch normalization
            gamma, beta, bn_parma = 1., 0, None
            if self.normalization == 'batchnorm':
                gamma = self.params['gamma' + str(layer)]
                beta = self.params['beta' + str(layer)]
                bn_parma = self.bn_params[layer - 1]  # zero based

            x_, cache = combo_forward(x_, w, b, gamma, beta, bn_parma)
            combo_caches.append(cache)

        # final fully connected layer
        w = self.params['W' + str(self.num_layers)]
        b = self.params['b' + str(self.num_layers)]
        scores, fc_cache = affine_forward(x_, w, b)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        loss, dout = softmax_loss(scores, y)

        # finall fully connected layer
        dout, dw, db = affine_backward(dout, fc_cache)
        grads['W' +
              str(self.num_layers
                  )] = dw + self.reg * self.params['W' + str(self.num_layers)]
        grads['b' + str(self.num_layers)] = db
        # adjust loss with regularization term of dWL
        loss += 0.5 * self.reg * np.sum(self.params['W' + str(self.num_layers)]
                                        **2)

        # middle combo layers
        for layer in range(self.num_layers - 1, 0, -1):  # [L-1, L-2, ... ,1]

            dout, dw, db, dgamma, dbeta = combo_backward(
                dout, combo_caches[layer - 1])
            grads['W' +
                  str(layer)] = dw + self.reg * self.params['W' + str(layer)]
            grads['b' + str(layer)] = db

            if self.normalization == 'batchnorm':
                grads['gamma' + str(layer)] = dgamma
                grads['beta' + str(layer)] = dbeta

            # adjust loss with regularization term of dWl
            loss += 0.5 * self.reg * np.sum(self.params['W' + str(layer)]**2)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #17
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        arg, caches = X, []

        for i in range(1, self.num_layers + 1):
            cache = {}

            W = self.params[f"W{i}"]
            b = self.params[f"b{i}"]
            arg, cache['fc_cache'] = affine_forward(arg, W, b)

            if i != self.num_layers and self.normalization:
                gamma = self.params[f"gamma{i}"]
                beta = self.params[f"beta{i}"]

                normalize_forward = batchnorm_forward if self.normalization is 'batchnorm' else layernorm_forward
                arg, cache['bn_cache'] = normalize_forward(arg, gamma, beta, self.bn_params[i-1])

            arg, cache['relu_cache'] = relu_forward(arg)

            if self.use_dropout:
                arg, cache['dropout_cache'] = dropout_forward(arg, self.dropout_param)

            caches.append(cache)

        scores = arg

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        loss, dout = softmax_loss(scores, y)

        for i in range(self.num_layers, 0, -1):
            W = self.params[f"W{i}"]
            cache = caches[i-1]

            if self.use_dropout:
                dout = dropout_backward(dout, cache['dropout_cache'])

            da = relu_backward(dout, cache['relu_cache'])

            if i != self.num_layers and self.normalization:
                normalize_backward = batchnorm_backward if self.normalization is 'batchnorm' else layernorm_backward
                da, dgamma, dbeta = batchnorm_backward(da, cache['bn_cache'])
                grads[f"gamma{i}"] = dgamma
                grads[f"beta{i}"] = dbeta

            dout, dw, db = affine_backward(da, cache['fc_cache'])

            grads[f"W{i}"] = dw + self.reg * W
            grads[f"b{i}"] = db

            loss += 0.5 * self.reg * np.sum(W * W)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
# Load the (preprocessed) CIFAR10 data.
data = get_CIFAR10_data()
for k, v in list(data.items()):
    print(('%s:' % k, v.shape))

# Test the affine_forward function
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3
input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)
x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape),
                                                    output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)
out, _ = affine_forward(x, w, b)
correct_out = np.array([[1.49834967, 1.70660132, 1.91485297],
                        [3.25553199, 3.5141327, 3.77273342]])
# Compare your output with ours. The error should be around 1e-9.
print('Testing affine_forward function:')
print('difference: ', rel_error(out, correct_out))

# Test the affine_backward function
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x,
                                       dout)
Beispiel #19
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.dropout_param is not None:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param[mode] = mode

        scores = None
        #######################################################################
        # TODO: Implement the forward pass for the fully-connected net,
        # computing the class scores for X and storing them in the scores
        # variable.
        #
        # When using dropout, you'll need to pass self.dropout_param to each
        # dropout forward pass.
        #
        # When using batch normalization, you'll need to pass self.bn_params[0]
        # to the forward pass for the first batch normalization layer,
        # pass self.bn_params[1] to the forward pass for the second batch
        # normalization layer, etc.
        #######################################################################
        IN = X

        caches = {}
        if self.use_dropout:
            dropout_caches = {}

        for l in range(self.num_layers - 1):
            W = self.params["W{}".format(l + 1)]
            b = self.params["b{}".format(l + 1)]

            if self.use_batchnorm:
                gamma = self.params["gamma{}".format(l + 1)]
                beta = self.params["beta{}".format(l + 1)]
                IN, cache = affine_batchnorm_relu_forward(
                    IN, W, b, gamma, beta, self.bn_params[l])
            else:
                IN, cache = affine_relu_forward(IN, W, b)

            caches[l] = cache

            if self.use_dropout:
                IN, d_cache = dropout_forward(IN, self.dropout_param)
                dropout_caches[l] = d_cache

        # forward pass: last affine layer
        num_last = self.num_layers
        name_W_last = "W{}".format(num_last)
        name_b_last = "b{}".format(num_last)
        W_last = self.params[name_W_last]
        b_last = self.params[name_b_last]

        scores, cache_last = affine_forward(IN, W_last, b_last)

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        #######################################################################
        # TODO: Implement the backward pass for the fully-connected net.
        # Store the loss in the loss variable and gradients in the grads
        # dictionary. Compute data loss using softmax, and make sure that
        # grads[k] holds the gradients for self.params[k]. Don't forget to add
        # L2 regularization!
        #
        # When using batch normalization, you don't need to regularize the
        # scale and shift parameters.
        #
        # NOTE: To ensure that your implementation matches ours and you pass
        # the automated tests, make sure that your L2 regularization includes a
        # factor of 0.5 to simplify the expression for the gradient.
        #######################################################################

        # loss
        loss, dscores = softmax_loss(scores, y)

        # regularization loss
        for l in range(self.num_layers):
            W = self.params["W{}".format(l + 1)]
            loss += 0.5 * self.reg * np.sum(W * W)

        # backprop through last affine layer
        dx, dw, db = affine_backward(dscores, cache_last)
        grads[name_W_last] = dw + self.reg * W_last
        grads[name_b_last] = db

        # backprop through affine-batchnorm-relu layers
        for l in reversed(range(self.num_layers - 1)):
            name_W = "W{}".format(l + 1)
            name_b = "b{}".format(l + 1)

            if self.use_dropout:
                dx = dropout_backward(dx, dropout_caches[l])

            if self.use_batchnorm:
                dx, dw, db, dgamma, dbeta = affine_batchnorm_relu_backward(
                    dx, caches[l])
                grads["gamma{}".format(l + 1)] = dgamma
                grads["beta{}".format(l + 1)] = dbeta
            else:
                dx, dw, db = affine_relu_backward(dx, caches[l])
            grads[name_W] = dw + self.reg * self.params[name_W]
            grads[name_b] = db

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        return loss, grads
Beispiel #20
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = "test" if y is None else "train"

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param["mode"] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param["mode"] = mode
        """
        loss 과정에서 활용할 리스트들
        [i] : i번째 layer의 변수들
        """
        fc = []
        relu = []
        bn = []
        dropout = []
        cache_bn = []
        cache_fc = []
        cache_relu = []
        cache_dropout = []
        fc.append(0)
        bn.append(0)
        relu.append(X)
        dropout.append(0)
        cache_bn.append(0)
        cache_dropout.append(0)
        cache_fc.append(0)
        cache_relu.append(0)
        # 맨 처음 trian data X를 집어넣어준다
        # 0으로 모든 리스트를 초기화해준다
        # 이러한 작업을 해주는 이유 : 인덱스를 1부터 L-1까지 활용하기 위함
        """
        fc_i : i번째 layer의 output
        cache_fc_i : i번째 layer의 input
        """
        for i in range(1, self.num_layers):  # 1부터 L-1까지
            # affine
            fc_i, cache_fc_i = affine_forward(relu[i - 1],
                                              self.params["W" + str(i)],
                                              self.params["b" + str(i)])
            fc.append(fc_i)
            cache_fc.append(cache_fc_i)
            if self.use_batchnorm:
                # batchnorm
                bn_i, cache_bn_i = batchnorm_forward(
                    fc_i,
                    gamma=self.params["gamma" + str(i)],
                    beta=self.params["beta" + str(i)],
                    bn_param=self.bn_params[i - 1],
                )
                bn.append(bn_i)
                cache_bn.append(cache_bn_i)
                # relu
                relu_i, cache_relu_i = relu_forward(bn_i)
                relu.append(relu_i)
                cache_relu.append(cache_relu_i)
            else:
                # relu
                relu_i, cache_relu_i = relu_forward(fc[i])
                relu.append(relu_i)
                cache_relu.append(cache_relu_i)

            # dropout layer
            if self.use_dropout:
                dropout_i, cache_dropout_i = dropout_forward(
                    relu_i, dropout_param=self.dropout_param)
                dropout.append(dropout_i)
                cache_dropout.append(cache_dropout_i)

        # 마지막 L번째 layer : affine & softmax
        fc_L, cache_fc_L = affine_forward(
            dropout[-1] if self.use_dropout else relu[-1],
            self.params["W" + str(self.num_layers)],
            self.params["b" + str(self.num_layers)])
        fc.append(fc_L)
        cache_fc.append(cache_fc_L)

        # (N,C)

        scores = fc[self.num_layers]
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == "test":
            return scores

        loss, grads = 0.0, {}
        loss, d_scores = softmax_loss(scores, y)

        dx_ = []
        dfc = []
        drelu = []
        dbatch = []
        ddropout = []

        # 맨 마지막 Layer
        drelu_L, dWL, dbL = affine_backward(d_scores,
                                            cache_fc[self.num_layers])
        dfc.append(d_scores)
        dx_.append(drelu_L)
        grads["W" + str(self.num_layers)] = dWL
        grads["b" + str(self.num_layers)] = dbL

        for i in range(self.num_layers - 1, 0,
                       -1):  # N-1, 1 : all hidden layer
            # dropout backward
            if self.use_dropout:
                ddropout_i = dropout_backward(dx_[-1], cache_dropout[i])
                ddropout.append(ddropout_i)

            # relu backward
            d_fc = relu_backward(ddropout[-1] if self.use_dropout else dx_[-1],
                                 cache_relu[i])

            # batch normalization
            if self.use_batchnorm:
                # vriable name = d_fc이지만 사실은 d_batch
                dbatch.append(d_fc)
                # print('i = ', i)
                # print('length of cache_bn = ', len(cache_bn))
                d_fc, dgamma, dbeta = batchnorm_backward(dbatch[-1],
                                                         cache=cache_bn[i])
                grads["gamma" + str(i)] = dgamma
                grads["beta" + str(i)] = dbeta
                dfc.append(d_fc)
            else:
                dfc.append(d_fc)

            # affine backward
            dx, dw, db = affine_backward(dfc[-1], cache_fc[i])
            dx_.append(dx)
            grads["W" + str(i)] = dw
            grads["b" + str(i)] = db
            # if (i == 1):
            # print(i)

        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #21
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        caches = collections.defaultdict(list)
        out_layer = X

        for i in range(self.num_layers - 1):
            n = str(i + 1)

            # (zy) The learned parameters are for BN affine transformation used
            # in training, while the running average is used for prediction.
            if self.use_batchnorm:
                out_layer, cache = affine_bn_relu_forward(
                    out_layer, self.params["W" + n], self.params["b" + n],
                    self.params["gamma" + n], self.params["beta" + n],
                    self.bn_params[i])
                caches["affine_bn_relu"].append(cache)
            else:
                out_layer, cache = layers.affine_forward(
                    out_layer, self.params["W" + n], self.params["b" + n])
                caches["affine"].append(cache)

                out_layer, cache = layers.relu_forward(out_layer)
                caches["relu"].append(cache)

            if self.use_dropout:
                out_layer, cache = layers.dropout_forward(
                    out_layer, self.dropout_param)
                caches["drop"].append(cache)

        nn = str(self.num_layers)
        scores, cache = layers.affine_forward(out_layer, self.params["W" + nn],
                                              self.params["b" + nn])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dloss = layers.softmax_loss(scores, y)
        # for regularization
        if self.reg != 0:
            for k, v in self.params.items():
                # only include the w parameters, excluding gamma, beta and b
                if k.startswith("W"):
                    loss += 0.5 * self.reg * np.sum(v**2)

        # get the gradient
        out = layers.affine_backward(dloss, cache)
        dout, grads["W" + nn], grads["b" + nn] = out
        grads["W" + nn] += self.reg * cache[1]

        for i in range(self.num_layers - 2, -1, -1):
            n = str(i + 1)

            if self.use_dropout:
                dout = layers.dropout_backward(dout, caches["drop"][i])

            if self.use_batchnorm:
                out = affine_bn_relu_backward(dout,
                                              caches["affine_bn_relu"][i])
                dout, grads["W"+n], grads["b"+n], \
                    grads["gamma"+n], grads["beta"+n] = out
                grads["W" +
                      n] += self.reg * self.params["W" + n] if self.reg else 0

            else:
                dout = layers.relu_backward(dout, caches["relu"][i])

                out = layers.affine_backward(dout, caches["affine"][i])
                dout, grads["W" + n], grads["b" + n] = out
                # need to include regularization
                grads["W" + n] += self.reg * caches["affine"][i][1]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #22
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        hidden1_out, h1_cache = affine_forward(X, self.params['W1'],
                                               self.params['b1'])
        relu_out, relu_cache = relu_forward(hidden1_out)
        scores, h2_cache = affine_forward(relu_out, self.params['W2'],
                                          self.params['b2'])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        """
        X_reshape=np.reshape(X,(X.shape[0],-1))
        num_trains=X.shape[0]
        loss,_=softmax_loss(scores,y)
        loss=loss+self.reg*0.5*(np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']))
        softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        grads['b2']=np.zeros_like(self.params['b2'])
        grads['W2']=np.zeros_like(self.params['W2'])
        grads['b1']=np.zeros_like(self.params['b1'])
        grads['W1']=np.zeros_like(self.params['W1'])
        grads['b2']=np.sum(softmax_output,axis=0)
        grads['W2']=np.dot(relu_out.T,softmax_output)
        grads_b1_tmp=np.dot(softmax_output,self.params['W2'].T)
        tmp=(relu_out>0)*grads_b1_tmp
        grads['b1']=np.sum(tmp,axis=0)
        grads['W1']=np.dot(X_reshape.T,grads_b1_tmp)
        grads['W1']=grads['W1']/num_trains+self.reg*self.params['W1']
        grads['b1']=grads['b1']/num_trains
        grads['W2']=grads['W2']/num_trains+self.reg*self.params['W2']
        grads['b2']=grads['b2']/num_trains
        """
        num_trains = X.shape[0]
        loss, dscore = softmax_loss(scores, y)
        loss = loss + self.reg * 0.5 * (
            np.sum(self.params['W2'] * self.params['W2']) +
            np.sum(self.params['W1'] * self.params['W1']))
        grads_h2, grads_w2, grads_b2 = affine_backward(dout=dscore,
                                                       cache=h2_cache)
        grads_relu = relu_backward(grads_h2, relu_cache)
        grads_h1, grads_w1, grads_b1 = affine_backward(grads_relu, h1_cache)
        grads['W1'] = grads_w1 + self.reg * self.params['W1']
        grads['W2'] = grads_w2 + self.reg * self.params['W2']
        grads['b1'] = grads_b1
        grads['b2'] = grads_b2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        return loss, grads
Beispiel #23
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        out_affine1, cache_affine1 = layers.affine_forward(
            X, self.params["W1"], self.params["b1"])
        out_relu1, cache_relu1 = layers.relu_forward(out_affine1)
        out_affine2, cache_affine2 = layers.affine_forward(
            out_relu1, self.params["W2"], self.params["b2"])
        # no need to compute SVM/softmax loss, just give the argmax result When
        # we are in prediction.
        scores = out_affine2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # in training, compute the loss and do backprop.
        loss, dloss = layers.softmax_loss(scores, y)
        # need to add regularization here...
        loss += 0.5 * self.reg * (np.sum(self.params["W1"]**2) +
                                  np.sum(self.params["W2"]**2))
        dout_affine2 = layers.affine_backward(dloss, cache_affine2)
        grads["W2"] = dout_affine2[1] + self.reg * self.params["W2"]
        grads["b2"] = dout_affine2[2]
        dout_relu1 = layers.relu_backward(dout_affine2[0], cache_relu1)
        dout_affine1 = layers.affine_backward(dout_relu1, cache_affine1)
        grads["W1"] = dout_affine1[1] + self.reg * self.params["W1"]
        grads["b1"] = dout_affine1[2]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #24
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        #######################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the
        # class scores for X and storing them in the scores variable.
        #######################################################################
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]

        N = X.shape[0]
        C = W2.shape[1]

        scores = np.zeros((N, C))

        X_hidden, cache1 = affine_relu_forward(X, W1, b1)
        scores, cache2 = affine_forward(X_hidden, W2, b2)

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        #######################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the
        # loss in the loss variable and gradients in the grads dictionary.
        # Compute data loss using softmax, and make sure that grads[k]
        # holds the gradients for self.params[k]. Don't forget to add L2
        # regularization!
        #
        # NOTE: To ensure that your implementation matches ours and you pass
        # the automated tests, make sure that your L2 regularization includes a
        # factor of 0.5 to simplify the expression for the gradient.
        #######################################################################

        loss, dscores = softmax_loss(scores, y)
        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))

        dx_hidden, dw2, db2 = affine_backward(dscores, cache2)
        grads["W2"] = dw2 + self.reg * W2
        grads["b2"] = db2

        dx, dw1, db1 = affine_relu_backward(dx_hidden, cache1)
        grads["W1"] = dw1 + self.reg * W1
        grads["b1"] = db1
        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        return loss, grads
Beispiel #25
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################

        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N = X.shape[0]
        D = np.prod(X.shape[1:])

        X_ = X.reshape(N, D)
        A, fc1_cache = affine_forward(X_, W1, b1)
        R, relu_cache = relu_forward(A)
        scores, fc2_cache = affine_forward(R, W2, b2)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        loss, dscores = softmax_loss(scores, y)
        dR, dW2, db2 = affine_backward(dscores, fc2_cache)
        dA = relu_backward(dR, relu_cache)
        dX, dW1, db1 = affine_backward(dA, fc1_cache)

        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
        dW2 += self.reg * W2
        dW1 += self.reg * W1

        grads = {'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2}

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #26
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1 = self.params["W1"]
        W2 = self.params["W2"]
        b1 = self.params["b1"]
        b2 = self.params["b2"]
        fc_1, cache_fc_1 = affine_forward(X, W1, b1)  # (N, H)
        relu_1, cache_relu_1 = relu_forward(fc_1)  # (N, H)
        fc_2, cache_fc_2 = affine_forward(relu_1, W2, b2)  # (N, C)
        import copy

        scores = copy.deepcopy(fc_2)

        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        loss, d_scores = softmax_loss(scores, y)

        d_relu_1, d_W2, d_b2 = affine_backward(d_scores, cache_fc_2)
        d_fc_1 = relu_backward(d_relu_1, cache_relu_1)
        dx, d_W1, d_b1 = affine_backward(d_fc_1, cache_fc_1)

        grads["W1"] = d_W1
        grads["W2"] = d_W2
        grads["b1"] = d_b1
        grads["b2"] = d_b2

        loss += 0.5 * self.reg * \
            (np.sum(np.square(self.params["W1"])) +
             np.sum(np.square(self.params["W2"])))

        grads["W2"] += self.reg * self.params["W2"]
        grads["W1"] += self.reg * self.params["W1"]
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Beispiel #27
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        """
        if self.use_dropout:
            self.dropout_param['mode']=mode
       """
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        X_temp = X
        affine_Input = list()
        relu_input = list()
        batchnorm_input = list()
        dropout_input = list()
        score_tmp = None
        for i in range(self.num_layers - 1):
            tmp, affine_input_tmp = affine_forward(
                X_temp, self.params['W' + str(i + 1)],
                self.params['b' + str(i + 1)])
            if self.use_batchnorm:
                tmp, batchnorm_cache = batchnorm_forward(
                    tmp, self.params['gamma' + str(i + 1)],
                    self.params['beta' + str(i + 1)], self.bn_params[i])
                batchnorm_input.append(batchnorm_cache)
            score_tmp, relu_input_tmp = relu_forward(tmp)
            if self.use_dropout:
                score_tmp, dropout_cache = dropout_forward(
                    score_tmp, self.dropout_param)
                dropout_input.append(dropout_cache)
            affine_Input.append(affine_input_tmp)
            relu_input.append(relu_input_tmp)
            X_temp = score_tmp
        scores, last_input_tmp = affine_forward(
            score_tmp, self.params['W' + str(self.num_layers)],
            self.params['b' + str(self.num_layers)])
        affine_Input.append(last_input_tmp)
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        if mode == 'test':
            return scores
        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        num_trains = X.shape[0]
        loss, dscores = softmax_loss(scores, y)
        weight_decay_sum = 0
        for i in range(self.num_layers):
            tmp = np.sum(self.params['W' + str(i + 1)] *
                         self.params['W' + str(i + 1)])
            weight_decay_sum = weight_decay_sum + tmp

        loss = loss + 0.5 * self.reg * weight_decay_sum
        #softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        #softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        dout = dscores
        for i in range(self.num_layers):
            dx, dw, db = affine_backward(dout, affine_Input[-(i + 1)])
            grads['W' +
                  str(self.num_layers - i)] = dw + self.reg * self.params[
                      'W' + str(self.num_layers - i)]
            grads['b' + str(self.num_layers - i)] = db
            if self.use_dropout and i != self.num_layers - 1:
                dx = dropout_backward(dx, dropout_input[-(i + 1)])
            if i != self.num_layers - 1:
                dout = relu_backward(dx, relu_input[-(i + 1)])
            if i != self.num_layers - 1 and self.use_batchnorm:
                dout, dgamma, dbeta = batchnorm_backward(
                    dout, batchnorm_input[-(i + 1)])
                grads['gamma' + str(self.num_layers - i - 1)] = dgamma
                grads['beta' + str(self.num_layers - i - 1)] = dbeta

        return loss, grads
Beispiel #28
0
def two_layer_net(X, model, y=None, reg=0.0):
  """
  Compute the loss and gradients for a two layer fully connected neural network.
  The net has an input dimension of D, a hidden layer dimension of H, and
  performs classification over C classes. We use a softmax loss function and L2
  regularization the the weight matrices. The two layer net should use a ReLU
  nonlinearity after the first affine layer.

  The two layer net has the following architecture:

  input - fully connected layer - ReLU - fully connected layer - softmax

  The outputs of the second fully-connected layer are the scores for each
  class.

  Inputs:
  - X: Input data of shape (N, D). Each X[i] is a training sample.
  - model: Dictionary mapping parameter names to arrays of parameter values.
    It should contain the following:
    - W1: First layer weights; has shape (D, H)
    - b1: First layer biases; has shape (H,)
    - W2: Second layer weights; has shape (H, C)
    - b2: Second layer biases; has shape (C,)
  - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
    an integer in the range 0 <= y[i] < C. This parameter is optional; if it
    is not passed then we only return scores, and if it is passed then we
    instead return the loss and gradients.
  - reg: Regularization strength.

  Returns:
  If y not is passed, return a matrix scores of shape (N, C) where scores[i, c]
  is the score for class c on input X[i].

  If y is not passed, instead return a tuple of:
  - loss: Loss (data loss and regularization loss) for this batch of training
    samples.
  - grads: Dictionary mapping parameter names to gradients of those parameters
    with respect to the loss function. This should have the same keys as model.
  """

  # unpack variables from the model dictionary
  W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
  N, D = X.shape

  # compute the forward pass
  scores = None
  #############################################################################
  # TODO: Perform the forward pass, computing the class scores for the input. #
  # Store the result in the scores variable, which should be an array of      #
  # shape (N, C).                                                             #
  #############################################################################
  # relu = lambda x: np.maximum(x,0)
  # H, C = W2.shape
  # scores = np.zeros((N,C))
  # layer1 = np.maximum(np.dot(X,W1) + b1,0)
  # scores = np.dot(layer1,W2) + b2
  ## above is the test implementation
  ## NOW, using cs231n/layers.py
  ## NOTICE define layer0 = X
  # then behaviour is 'functional' layer(n+1) = f(layer(n) | parameters)
  from cs231n.layers import affine_forward, relu_forward, softmax_loss
  from cs231n.layers import affine_backward, relu_backward

  layer1, cache1 = affine_forward(X, W1, b1)
  layer2, cache2 = relu_forward(layer1)
  layer3, cache3 = affine_forward(layer2, W2, b2)

  scores = layer3
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################
  
  # If the targets are not given then jump out, we're done
  if y is None:
    return scores

  # compute the loss
  loss = None
  #############################################################################
  # TODO: Finish the forward pass, and compute the loss. This should include  #
  # both the data loss and L2 regularization for W1 and W2. Store the result  #
  # in the variable loss, which should be a scalar. Use the Softmax           #
  # classifier loss. So that your results match ours, multiply the            #
  # regularization loss by 0.5                                                #
  #############################################################################
  # rows   = np.sum(np.exp(scores), axis=1)
  # layer4 = np.mean(-layer3[range(N), y] + np.log(rows))
  # loss   = layer4 + 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
  # 
  loss, dx = softmax_loss(scores, y)
  loss += 0.5 * reg * np.sum(W1*W1) + 0.5 * reg * np.sum(W2 * W2)
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  # compute the gradients
  grads = {}
  #############################################################################
  # TODO: Compute the backward pass, computing the derivatives of the weights #
  # and biases. Store the results in the grads dictionary. For example,       #
  # grads['W1'] should store the gradient on W1, and be a matrix of same size #
  #############################################################################
  dlayer2, grads['W2'], grads['b2'] = affine_backward(dx, cache3)
  dlayer1                           = relu_backward(dlayer2, cache2)
  dLayer0, grads['W1'], grads['b1'] = affine_backward(dlayer1, cache1)

  #gradients need to have regularization term
  grads['W2'] += reg * W2
  grads['W1'] += reg * W1
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  return loss, grads