def validate_gradient(): """ Function to validate the implementation of gradient computation. Should be used together with gradient_check.py. This is a useful thing to do when you implement your own gradient calculation methods. It is not required for this assignment. """ from gradient_check import eval_numerical_gradient, rel_error # randomly initialize W dim = 4 num_classes = 4 num_inputs = 5 params = {} std = 0.001 params['W1'] = std * np.random.randn(dim, 10) params['b1'] = np.zeros(10) params['W2'] = std * np.random.randn(10, num_classes) params['b2'] = np.zeros(num_classes) X = np.random.randn(num_inputs, dim) y = np.array([0, 1, 2, 2, 1]) loss, grads = compute_neural_net_loss(params, X, y, reg=0.1) # these should all be less than 1e-8 or so for param_name in params: f = lambda W: compute_neural_net_loss(params, X, y, reg=0.1)[0] param_grad_num = eval_numerical_gradient(f, params[param_name], verbose=False) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
def check_f(X,Y,net): loss, grads = net.loss_f(X, Y) for param_name in grads: f = lambda W: net.loss_f(X, Y)[0] param_grad_num = eval_numerical_gradient(f, net.param[param_name], verbose=False) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
def test_backprop(self): dummy_scores = np.random.rand(5, 5) dummy_labels = np.array([0, 1, 2, 3, 4]) expected_grad = eval_numerical_gradient( lambda x: avg_loss(self.layer.forward(x), dummy_labels), dummy_scores) grad = self.layer.backprop(dummy_labels) np.testing.assert_almost_equal(expected_grad, grad, decimal=5)
def test_temporal_softmax_loss(self): N, T, V = 7, 8, 9 score = np.random.randn(N, T, V) y = np.random.randint(V, size=(N, T)) mask = (np.random.rand(N, T) > 0.5) _, grad_score = temporal_softmax_loss(score, y, mask, verbose=False) grad_score_num = eval_numerical_gradient( lambda x: temporal_softmax_loss(x, y, mask)[0], score, verbose=False) self.assertAlmostEqual(rel_error(grad_score, grad_score_num), 1e-9, places=2)
def test_softmax_loss(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 20)) X = np.random.randn(N, C) y = np.random.randint(C, size=(N,)) loss, grads = SoftMaxLoss(X, y) f = lambda _: SoftMaxLoss(X, y)[0] grads_num = eval_numerical_gradient(f, X, verbose = False, h = 1e-5) self.assertLess(rel_error(grads_num, grads), rel_error_max)
def test_backprop(self): x = np.random.randn(self.N, self.D) y = np.random.randn(*x.shape) # Numerical gradient w.r.t inputs num_grad_x = eval_numerical_gradient( f=lambda x: categorical_cross_entropy(self.layer.forward_prop(x), y ), x=x, verbose=False) # Compute gradients using backprop algorithm grad_x = self.layer.backprop(y) np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7)
def test_softmax_loss(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 20)) X = np.random.randn(N, C) y = np.random.randint(C, size=(N, )) loss, grads = SoftMaxLoss(X, y) f = lambda _: SoftMaxLoss(X, y)[0] grads_num = eval_numerical_gradient(f, X, verbose=False, h=1e-5) self.assertLess(rel_error(grads_num, grads), rel_error_max)
def softmax_loss_test(): np.random.seed(231) num_classes, num_inputs = 10, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False) loss, dx = softmax_loss(x, y) # Test softmax_loss function. Loss should be close to 2.3 and dx error should be around e-8 print('\nTesting softmax_loss:') print('loss: ', loss) print('dx error: ', rel_error(dx_num, dx))
def test_crossentropy_loss(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 10)) X = np.random.randn(N, C) y = np.random.randint(C, size=(N,)) y = dense_to_one_hot(y, C) X = np.exp(X - np.max(X, axis = 1, keepdims = True)) X /= np.sum(X, axis = 1, keepdims = True) loss = CrossEntropyModule().forward(X, y) grads = CrossEntropyModule().backward(X, y) f = lambda _: CrossEntropyModule().forward(X, y) grads_num = eval_numerical_gradient(f, X, verbose = False, h = 1e-5) self.assertLess(rel_error(grads_num, grads), rel_error_max)
def test_crossentropy_loss(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 10)) X = np.random.randn(N, C) y = np.random.randint(C, size=(N,)) X = np.exp(X - np.max(X, axis = 1, keepdims = True)) X /= np.sum(X, axis = 1, keepdims = True) loss, grads = CrossEntropyLoss(X, y) f = lambda _: CrossEntropyLoss(X, y)[0] grads_num = eval_numerical_gradient(f, X, verbose = False, h = 1e-5) self.assertLess(rel_error(grads_num, grads), rel_error_max)
def gradient_check(self, X, y): """Runs gradient check on every parameter of the model Args: X: Input data in matrix form, of any shape y: Vector of labels """ print "Running numeric gradient check with reg = %s" % self.reg loss, grads = self.loss(X, y) for param_key in sorted(self.params): f = lambda _: self.loss(X, y)[0] num_grad = eval_numerical_gradient(f, self.params[param_key], verbose=False) print "%s relative error: %.2e" % ( param_key, rel_error(num_grad, grads[param_key]))
# Implement the rest of the function. This will compute the gradient of the # loss with respect to the variables `W1`, `b1`, `W2`, and `b2`. Now that you # have a correctly implemented forward pass, you can debug your backward pass # using a numeric gradient check: # Use numeric gradient checking to check your implementation of the backward # pass. If your implementation is correct, the difference between the numeric # and analytic gradients should be less than 1e-8 for each of W1, W2, b1, and # b2. loss, grads = net.loss(X, y, reg=0.05) # these should all be less than 1e-8 or so for param_name in grads: f = lambda W: net.loss(X, y, reg=0.05)[0] param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) #====================================================================================== # Q3: Train the network using gradient descent #====================================================================================== # To train the network we will use stochastic gradient # descent (SGD). Look at the function `TwoLayerNet.train` and fill in the # missing sections to implement the training procedure. You will also have to # implement `TwoLayerNet.predict`, as the training process periodically # performs prediction to keep track of accuracy over time while the network # trains. # Once you have implemented the method, run the code below to train a
db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout) # Relative error should be around e-10 or less print('Testing affine_relu_forward and affine_relu_backward:') print('dx error: ', rel_error(dx_num, dx)) print('dw error: ', rel_error(dw_num, dw)) print('db error: ', rel_error(db_num, db)) #Loss test np.random.seed(231) num_classes, num_inputs = 10, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False) loss, dx = svm_loss(x, y) # Test svm_loss function. Loss should be around 9 and dx error should be around the order of e-9 print('Testing svm_loss:') print('loss: ', loss) print('dx error: ', rel_error(dx_num, dx)) dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False) loss, dx = softmax_loss(x, y) # Test softmax_loss function. Loss should be close to 2.3 and dx error should be around e-8 print('\nTesting softmax_loss:') print('loss: ', loss) print('dx error: ', rel_error(dx_num, dx))
################################################################################### # Loss layers: Softmax and SVM # ################################################################################### # You implemented these loss functions in the last assignment, so we'll # # give them to you for free here. You should still make sure you # # understand how they work by looking at the implementations in # # layers.py. You can make sure that the implementations are # # correct by running the following. # ################################################################################### num_classes, num_inputs = 10, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient(lambda x: layers.svm_loss(x, y)[0], x, verbose=False) loss, dx = layers.svm_loss(x, y) # Test svm_loss function. Loss should be around 9 and dx error should be 1e-9 print 'Testing svm_loss:' print 'loss: (should be around 9): ', loss print 'dx error (should be around 1e-9): ', rel_error(dx_num, dx) dx_num = eval_numerical_gradient(lambda x: layers.softmax_loss(x, y)[0], x, verbose=False) loss, dx = layers.softmax_loss(x, y) # Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8 print '\nTesting softmax_loss:' print 'loss (should be around 2.3): ', loss
num_inputs = 2 input_dim = (3, 16, 16) reg = 0.0 num_classes = 10 X = np.random.randn(num_inputs, *input_dim) y = np.random.randint(num_classes, size=num_inputs) model = cnn.ThreeLayerConvNet(num_filters=3, filter_size=3, input_dim=input_dim, hidden_dim=7, dtype=np.float64) if model.params != {}: loss, grads = model.loss(X, y) for param_name in sorted(grads): f = lambda _: model.loss(X, y)[0] param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6) e = rel_error(param_grad_num, grads[param_name]) print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])) ################################################################################### # Overfit small data # # # # A nice trick is to train your model with just a few training # # samples. You should be able to overfit small datasets, which will # # result in very high training accuracy and comparatively low validation # # accuracy. # ################################################################################### ''' num_train = 100 small_data = { 'X_train': data['X_train'][:num_train],
lambda w: affine_relu_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: affine_relu_forward(x, w, b)[0], b, dout) print('Testing affine_relu_forward:') print('dx error: ', rel_error(dx_num, dx)) print('dw error: ', rel_error(dw_num, dw)) print('db error: ', rel_error(db_num, db)) ####################################################################################### ####################################################################################### # Test the softmax _loss function ###################################################################################### from layers import softmax_loss np.random.seed(231) num_classes, num_inputs = 10, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False) loss, dx = softmax_loss(x, y) # Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8 print('\nTesting softmax_loss:') print('loss: ', loss) print('dx error: ', rel_error(dx_num, dx)) #######################################################################################
reg = 0.0 num_classes = 10 np.random.seed(231) X = np.random.randn(num_inputs, *input_dim) y = np.random.randint(num_classes, size=num_inputs) model = ThreeLayerConvNet(num_filters=3, filter_size=3, input_dim=input_dim, hidden_dim=7, dtype=np.float64) loss, grads = model.loss(X, y) for param_name in sorted(grads): f = lambda _: model.loss(X, y)[0] param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6) e = rel_error(param_grad_num, grads[param_name]) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) # ## Overfit small data # A nice trick is to train your model with just a few training samples. You should be able to overfit small datasets, which will result in very high training accuracy and comparatively low validation accuracy. np.random.seed(231) num_train = 100 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'],
# Implement the rest of the function. This will compute the gradient of the loss # with respect to the variables `W1`, `b1`, `W2`, and `b2`. Now that you (hopefully!) # have a correctly implemented forward pass, you can debug your backward pass using # a numeric gradient check: # Use numeric gradient checking to check your implementation of the backward pass. # If your implementation is correct, the difference between the numeric and # analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2. from gradient_check import eval_numerical_gradient loss, grads = net.loss_other(X, y, reg=0.1) # these should all be less than 1e-8 or so for param_name in grads: f = lambda W: net.loss(X, y, reg=0.1)[0] param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False) print ('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) # Train the network # To train the network we will use stochastic gradient descent (SGD), similar to the SVM # and Softmax classifiers. Look at the function TwoLayerNet.train and fill in the missing # sections to implement the training procedure. This should be very similar to the training # procedure you used for the SVM and Softmax classifiers. You will also have to implement # TwoLayerNet.predict, as the training process periodically performs prediction to keep track # of accuracy over time while the network trains. # Once you have implemented the method, run the code below to train a two-layer network on toy data. # You should achieve a training loss less than 0.2. net = init_toy_model() stats = net.train(X, y, X, y, learning_rate=1e-1, reg=1e-5, num_iters=100, batch_size =5, verbose=False)
from solver import Solver import time from relativeError import rel_meanError import numpy as np print('\n--------- square_loss & crossEntropy_loss test --------- ') np.random.seed(231) num_classes, num_inputs = 1, 50 x = np.random.randn(num_inputs, num_classes) y = np.random.randn(num_inputs, num_classes) print('\nx: ', x) print('\ny: ', y) dx_num = eval_numerical_gradient(lambda x: square_loss(x, y)[0], x, verbose=False) loss, dx = square_loss(x, y) #print('\ndx_num: ', dx_num) #print('\ndx: ', dx) print('\nnp.c_[dx_num, dx]: ', np.c_[dx_num, dx]) # Test square_loss function. print('Testing square_loss:') print('loss: ', loss) print('dx error: ', rel_meanError(dx_num, dx)) #------------------------------------------------------------------------------ #%% print('\n--------- affine_sigmoid_affine_backward test --------- ') np.random.seed(231) x = np.random.randn(10, 10)
print temporal_softmax_loss(x, y, mask)[0] check_loss(100, 1, 10, 1.0) # Should be about 2.3 check_loss(100, 10, 10, 1.0) # Should be about 23 check_loss(5000, 10, 10, 0.1) # Should be about 2.3 # Gradient check for temporal softmax loss N, T, V = 7, 8, 9 x = np.random.randn(N, T, V) y = np.random.randint(V, size=(N, T)) mask = (np.random.rand(N, T) > 0.5) loss, dx = temporal_softmax_loss(x, y, mask, verbose=False) dx_num = eval_numerical_gradient(lambda x: temporal_softmax_loss(x, y, mask)[0], x, verbose=False) print 'dx error: ', rel_error(dx, dx_num) #check loss function N, D, W, H = 10, 20, 30, 40 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} V = len(word_to_idx) T = 13 model = CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='rnn', dtype=np.float64)
print('Difference between your scores and correct scores:', np.sum(scores - correct_scores)) loss, _ = ver_neural.loss(ver_X_data, ver_y_data, reg=5e-2) correct_loss = 1.30378789133 # 通过小数据集验证模型在前向传播中是否有问题,此处defferent极小,验证前向传播没问题 print('Difference between your loss and correct loss:', np.sum(np.abs(loss - correct_loss))) from gradient_check import eval_numerical_gradient loss, gradient = ver_neural.loss(ver_X_data, ver_y_data, reg=5e-2) for param_name in gradient: f = lambda W: ver_neural.loss(ver_X_data, ver_y_data, reg=0.05)[0] param_grad_num = eval_numerical_gradient(f, ver_neural.params[param_name], verbose=False) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, gradient[param_name]))) ver_states = ver_neural.train(ver_X_data, ver_y_data, ver_X_data, ver_y_data, learning_rate=1e-1, reg=5e-6, num_iters=100, verbose=False) # 在极小数据集上loss几乎为0,准确率百分之百,可以认为模型代码没有问题 print('Final train loss: ', ver_states['loss_histroy'][-1])
dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123} out, cache = dropout_forward(x, dropout_param) dx = dropout_backward(dout, cache) dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout) print('dx relative error: ', rel_maxError(dx, dx_num)) #------------------------------------------------------------------------------ #%% print('\n--------- square_loss & crossEntropy_loss test --------- ') np.random.seed(231) num_classes, num_inputs = 1, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient(lambda x: square_loss(x, y)[0], x, verbose=False) loss, dx = square_loss(x, y) # Test square_loss function. print('Testing square_loss:') print('loss: ', loss) print('dx error: ', rel_maxError(dx_num, dx)) dx_num = eval_numerical_gradient(lambda x: crossEntropy_loss(x, y)[0], x, verbose=False) loss, dx = crossEntropy_loss(x, y) # Test crossEntropy_loss function. print('\n!!!caution crossEntropy_loss test might be wrong:') print('Testing crossEntropy_loss:') print('loss: ', loss) print('dx error: ', rel_maxError(dx_num, dx))