def validate_gradient():
    """
    Function to validate the implementation of gradient computation.
    Should be used together with gradient_check.py.
    This is a useful thing to do when you implement your own gradient
    calculation methods.
    It is not required for this assignment.
    """
    from gradient_check import eval_numerical_gradient, rel_error
    # randomly initialize W
    dim = 4
    num_classes = 4
    num_inputs = 5
    params = {}
    std = 0.001
    params['W1'] = std * np.random.randn(dim, 10)
    params['b1'] = np.zeros(10)
    params['W2'] = std * np.random.randn(10, num_classes)
    params['b2'] = np.zeros(num_classes)

    X = np.random.randn(num_inputs, dim)
    y = np.array([0, 1, 2, 2, 1])

    loss, grads = compute_neural_net_loss(params, X, y, reg=0.1)
    # these should all be less than 1e-8 or so
    for param_name in params:
      f = lambda W: compute_neural_net_loss(params, X, y, reg=0.1)[0]
      param_grad_num = eval_numerical_gradient(f, params[param_name], verbose=False)
      print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
Example #2
0
def check_f(X,Y,net):
    
    loss, grads = net.loss_f(X, Y)
    
    for param_name in grads:
        f = lambda W: net.loss_f(X, Y)[0]

        param_grad_num = eval_numerical_gradient(f, net.param[param_name], verbose=False)
        print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
Example #3
0
    def test_backprop(self):
        dummy_scores = np.random.rand(5, 5)
        dummy_labels = np.array([0, 1, 2, 3, 4])

        expected_grad = eval_numerical_gradient(
            lambda x: avg_loss(self.layer.forward(x), dummy_labels),
            dummy_scores)

        grad = self.layer.backprop(dummy_labels)

        np.testing.assert_almost_equal(expected_grad, grad, decimal=5)
Example #4
0
    def test_temporal_softmax_loss(self):
        N, T, V = 7, 8, 9
        score = np.random.randn(N, T, V)
        y = np.random.randint(V, size=(N, T))
        mask = (np.random.rand(N, T) > 0.5)

        _, grad_score = temporal_softmax_loss(score, y, mask, verbose=False)
        grad_score_num = eval_numerical_gradient(
            lambda x: temporal_softmax_loss(x, y, mask)[0],
            score,
            verbose=False)

        self.assertAlmostEqual(rel_error(grad_score, grad_score_num),
                               1e-9,
                               places=2)
  def test_softmax_loss(self):
    np.random.seed(42)
    rel_error_max = 1e-5
    
    for test_num in range(10):
      N = np.random.choice(range(1, 100))
      C = np.random.choice(range(1, 20))
      X = np.random.randn(N, C)
      y = np.random.randint(C, size=(N,))

      loss, grads = SoftMaxLoss(X, y)

      f = lambda _: SoftMaxLoss(X, y)[0]
      grads_num = eval_numerical_gradient(f, X, verbose = False, h = 1e-5)
      self.assertLess(rel_error(grads_num, grads), rel_error_max)
Example #6
0
    def test_backprop(self):
        x = np.random.randn(self.N, self.D)
        y = np.random.randn(*x.shape)

        # Numerical gradient w.r.t inputs
        num_grad_x = eval_numerical_gradient(
            f=lambda x: categorical_cross_entropy(self.layer.forward_prop(x), y
                                                  ),
            x=x,
            verbose=False)

        # Compute gradients using backprop algorithm
        grad_x = self.layer.backprop(y)

        np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7)
    def test_softmax_loss(self):
        np.random.seed(42)
        rel_error_max = 1e-5

        for test_num in range(10):
            N = np.random.choice(range(1, 100))
            C = np.random.choice(range(1, 20))
            X = np.random.randn(N, C)
            y = np.random.randint(C, size=(N, ))

            loss, grads = SoftMaxLoss(X, y)

            f = lambda _: SoftMaxLoss(X, y)[0]
            grads_num = eval_numerical_gradient(f, X, verbose=False, h=1e-5)
            self.assertLess(rel_error(grads_num, grads), rel_error_max)
Example #8
0
def softmax_loss_test():
    np.random.seed(231)
    num_classes, num_inputs = 10, 50
    x = 0.001 * np.random.randn(num_inputs, num_classes)
    y = np.random.randint(num_classes, size=num_inputs)

    dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0],
                                     x,
                                     verbose=False)
    loss, dx = softmax_loss(x, y)

    # Test softmax_loss function. Loss should be close to 2.3 and dx error should be around e-8
    print('\nTesting softmax_loss:')
    print('loss: ', loss)
    print('dx error: ', rel_error(dx_num, dx))
Example #9
0
  def test_crossentropy_loss(self):
    np.random.seed(42)
    rel_error_max = 1e-5

    for test_num in range(10):
      N = np.random.choice(range(1, 100))
      C = np.random.choice(range(1, 10))
      X = np.random.randn(N, C)
      y = np.random.randint(C, size=(N,))
      y = dense_to_one_hot(y, C)
      X = np.exp(X - np.max(X, axis = 1, keepdims = True))
      X /= np.sum(X, axis = 1, keepdims = True)
      loss = CrossEntropyModule().forward(X, y)
      grads = CrossEntropyModule().backward(X, y)
      f = lambda _: CrossEntropyModule().forward(X, y)
      grads_num = eval_numerical_gradient(f, X, verbose = False, h = 1e-5)
      self.assertLess(rel_error(grads_num, grads), rel_error_max)
  def test_crossentropy_loss(self):
    np.random.seed(42)
    rel_error_max = 1e-5
    
    for test_num in range(10):
      N = np.random.choice(range(1, 100))
      C = np.random.choice(range(1, 10))
      X = np.random.randn(N, C)
      y = np.random.randint(C, size=(N,))
      X = np.exp(X - np.max(X, axis = 1, keepdims = True))
      X /= np.sum(X, axis = 1, keepdims = True)

      loss, grads = CrossEntropyLoss(X, y)

      f = lambda _: CrossEntropyLoss(X, y)[0]
      grads_num = eval_numerical_gradient(f, X, verbose = False, h = 1e-5)
      self.assertLess(rel_error(grads_num, grads), rel_error_max)
Example #11
0
    def gradient_check(self, X, y):
        """Runs gradient check on every parameter of the model

        Args:
            X: Input data in matrix form, of any shape
            y: Vector of labels
        """
        print "Running numeric gradient check with reg = %s" % self.reg

        loss, grads = self.loss(X, y)
        for param_key in sorted(self.params):
            f = lambda _: self.loss(X, y)[0]
            num_grad = eval_numerical_gradient(f,
                                               self.params[param_key],
                                               verbose=False)
            print "%s relative error: %.2e" % (
                param_key, rel_error(num_grad, grads[param_key]))
Example #12
0
# Implement the rest of the function. This will compute the gradient of the
# loss with respect to the variables `W1`, `b1`, `W2`, and `b2`. Now that you
# have a correctly implemented forward pass, you can debug your backward pass
# using a numeric gradient check:

# Use numeric gradient checking to check your implementation of the backward
# pass.  If your implementation is correct, the difference between the numeric
# and analytic gradients should be less than 1e-8 for each of W1, W2, b1, and
# b2.
loss, grads = net.loss(X, y, reg=0.05)

# these should all be less than 1e-8 or so
for param_name in grads:
    f = lambda W: net.loss(X, y, reg=0.05)[0]
    param_grad_num = eval_numerical_gradient(f,
                                             net.params[param_name],
                                             verbose=False)
    print('%s max relative error: %e' %
          (param_name, rel_error(param_grad_num, grads[param_name])))

#======================================================================================
# Q3: Train the network using gradient descent
#======================================================================================

# To train the network we will use stochastic gradient
# descent (SGD). Look at the function `TwoLayerNet.train` and fill in the
# missing sections to implement the training procedure.  You will also have to
# implement `TwoLayerNet.predict`, as the training process periodically
# performs prediction to keep track of accuracy over time while the network
# trains.
# Once you have implemented the method, run the code below to train a
Example #13
0
db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)

# Relative error should be around e-10 or less
print('Testing affine_relu_forward and affine_relu_backward:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))


#Loss test
np.random.seed(231)
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be around the order of e-9
print('Testing svm_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be close to 2.3 and dx error should be around e-8
print('\nTesting softmax_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))
###################################################################################
#   Loss layers: Softmax and SVM                                                  #
###################################################################################
#   You implemented these loss functions in the last assignment, so we'll         #
#   give them to you for free here. You should still make sure you                #
#   understand how they work by looking at the implementations in                 #
#   layers.py.  You can make sure that the implementations are                    #
#   correct by running the following.                                             #
###################################################################################

num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: layers.svm_loss(x, y)[0], x, verbose=False)
loss, dx = layers.svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be 1e-9

print 'Testing svm_loss:'
print 'loss: (should be around 9): ', loss
print 'dx error (should be around 1e-9): ', rel_error(dx_num, dx)

dx_num = eval_numerical_gradient(lambda x: layers.softmax_loss(x, y)[0], x, verbose=False)
loss, dx = layers.softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8

print '\nTesting softmax_loss:'
print 'loss (should be around 2.3): ', loss
Example #15
0
num_inputs = 2
input_dim = (3, 16, 16)
reg = 0.0
num_classes = 10
X = np.random.randn(num_inputs, *input_dim)
y = np.random.randint(num_classes, size=num_inputs)

model = cnn.ThreeLayerConvNet(num_filters=3, filter_size=3,
                          input_dim=input_dim, hidden_dim=7,
                          dtype=np.float64)

if model.params != {}:
  loss, grads = model.loss(X, y)
  for param_name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))

###################################################################################
# Overfit small data                                                              #
#                                                                                 #
# A nice trick is to train your model with just a few training                    #
# samples. You should be able to overfit small datasets, which will               #
# result in very high training accuracy and comparatively low validation          #
# accuracy.                                                                       #
###################################################################################
'''
num_train = 100
small_data = {
  'X_train': data['X_train'][:num_train],
Example #16
0
    lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print('Testing affine_relu_forward:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))
#######################################################################################

#######################################################################################
# Test the softmax _loss function
######################################################################################
from layers import softmax_loss

np.random.seed(231)
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0],
                                 x,
                                 verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print('\nTesting softmax_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))
#######################################################################################
reg = 0.0
num_classes = 10
np.random.seed(231)
X = np.random.randn(num_inputs, *input_dim)
y = np.random.randint(num_classes, size=num_inputs)

model = ThreeLayerConvNet(num_filters=3,
                          filter_size=3,
                          input_dim=input_dim,
                          hidden_dim=7,
                          dtype=np.float64)
loss, grads = model.loss(X, y)
for param_name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    param_grad_num = eval_numerical_gradient(f,
                                             model.params[param_name],
                                             verbose=False,
                                             h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print('%s max relative error: %e' %
          (param_name, rel_error(param_grad_num, grads[param_name])))

# ## Overfit small data
# A nice trick is to train your model with just a few training samples. You should be able to overfit small datasets, which will result in very high training accuracy and comparatively low validation accuracy.

np.random.seed(231)

num_train = 100
small_data = {
    'X_train': data['X_train'][:num_train],
    'y_train': data['y_train'][:num_train],
    'X_val': data['X_val'],
# Implement the rest of the function. This will compute the gradient of the loss
# with respect to the variables `W1`, `b1`, `W2`, and `b2`. Now that you (hopefully!)
# have a correctly implemented forward pass, you can debug your backward pass using
# a numeric gradient check:

# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.
from gradient_check import eval_numerical_gradient

loss, grads = net.loss_other(X, y, reg=0.1)

# these should all be less than 1e-8 or so
for param_name in grads:
  f = lambda W: net.loss(X, y, reg=0.1)[0]
  param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
  print ('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))

# Train the network
# To train the network we will use stochastic gradient descent (SGD), similar to the SVM
# and Softmax classifiers. Look at the function TwoLayerNet.train and fill in the missing
# sections to implement the training procedure. This should be very similar to the training
# procedure you used for the SVM and Softmax classifiers. You will also have to implement
# TwoLayerNet.predict, as the training process periodically performs prediction to keep track
# of accuracy over time while the network trains.
# Once you have implemented the method, run the code below to train a two-layer network on toy data.
#  You should achieve a training loss less than 0.2.
net = init_toy_model()
stats = net.train(X, y, X, y,
            learning_rate=1e-1, reg=1e-5,
            num_iters=100, batch_size =5, verbose=False)
from solver import Solver
import time
from relativeError import rel_meanError
import numpy as np

print('\n--------- square_loss & crossEntropy_loss test --------- ')

np.random.seed(231)
num_classes, num_inputs = 1, 50
x = np.random.randn(num_inputs, num_classes)
y = np.random.randn(num_inputs, num_classes)

print('\nx: ', x)
print('\ny: ', y)
dx_num = eval_numerical_gradient(lambda x: square_loss(x, y)[0],
                                 x,
                                 verbose=False)
loss, dx = square_loss(x, y)
#print('\ndx_num: ', dx_num)
#print('\ndx: ', dx)
print('\nnp.c_[dx_num, dx]: ', np.c_[dx_num, dx])
# Test square_loss function.
print('Testing square_loss:')
print('loss: ', loss)
print('dx error: ', rel_meanError(dx_num, dx))

#------------------------------------------------------------------------------
#%%
print('\n--------- affine_sigmoid_affine_backward test --------- ')
np.random.seed(231)
x = np.random.randn(10, 10)
Example #20
0
  print temporal_softmax_loss(x, y, mask)[0]
  
check_loss(100, 1, 10, 1.0)   # Should be about 2.3
check_loss(100, 10, 10, 1.0)  # Should be about 23
check_loss(5000, 10, 10, 0.1) # Should be about 2.3

# Gradient check for temporal softmax loss
N, T, V = 7, 8, 9

x = np.random.randn(N, T, V)
y = np.random.randint(V, size=(N, T))
mask = (np.random.rand(N, T) > 0.5)

loss, dx = temporal_softmax_loss(x, y, mask, verbose=False)

dx_num = eval_numerical_gradient(lambda x: temporal_softmax_loss(x, y, mask)[0], x, verbose=False)

print 'dx error: ', rel_error(dx, dx_num)

#check loss function
N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='rnn',
          dtype=np.float64)
Example #21
0
print('Difference between your scores and correct scores:',
      np.sum(scores - correct_scores))

loss, _ = ver_neural.loss(ver_X_data, ver_y_data, reg=5e-2)
correct_loss = 1.30378789133
# 通过小数据集验证模型在前向传播中是否有问题,此处defferent极小,验证前向传播没问题
print('Difference between your loss and correct loss:',
      np.sum(np.abs(loss - correct_loss)))

from gradient_check import eval_numerical_gradient

loss, gradient = ver_neural.loss(ver_X_data, ver_y_data, reg=5e-2)
for param_name in gradient:
    f = lambda W: ver_neural.loss(ver_X_data, ver_y_data, reg=0.05)[0]
    param_grad_num = eval_numerical_gradient(f,
                                             ver_neural.params[param_name],
                                             verbose=False)
    print('%s max relative error: %e' %
          (param_name, rel_error(param_grad_num, gradient[param_name])))

ver_states = ver_neural.train(ver_X_data,
                              ver_y_data,
                              ver_X_data,
                              ver_y_data,
                              learning_rate=1e-1,
                              reg=5e-6,
                              num_iters=100,
                              verbose=False)
# 在极小数据集上loss几乎为0,准确率百分之百,可以认为模型代码没有问题
print('Final train loss: ', ver_states['loss_histroy'][-1])
dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123}
out, cache = dropout_forward(x, dropout_param)
dx = dropout_backward(dout, cache)
dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

print('dx relative error: ', rel_maxError(dx, dx_num))
#------------------------------------------------------------------------------
#%%
print('\n--------- square_loss & crossEntropy_loss test --------- ')

np.random.seed(231)
num_classes, num_inputs = 1, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: square_loss(x, y)[0], x, verbose=False)
loss, dx = square_loss(x, y)

# Test square_loss function. 
print('Testing square_loss:')
print('loss: ', loss)
print('dx error: ', rel_maxError(dx_num, dx))

dx_num = eval_numerical_gradient(lambda x: crossEntropy_loss(x, y)[0], x, verbose=False)
loss, dx = crossEntropy_loss(x, y)

# Test crossEntropy_loss function. 
print('\n!!!caution crossEntropy_loss test might be wrong:')
print('Testing crossEntropy_loss:')
print('loss: ', loss)
print('dx error: ', rel_maxError(dx_num, dx))