def Test_Affine_Backward(): # Test the affine_backward function np.random.seed(123) x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) mp_x = NumpyVarToMinpy(x) mp_w = NumpyVarToMinpy(w) mp_b = NumpyVarToMinpy(b) dx_num = eval_numerical_gradient_array(lambda x: MinpyVarToNumpy(affine_forward(NumpyVarToMinpy(x), mp_w, mp_b)[0]), x, dout) dw_num = eval_numerical_gradient_array(lambda w: MinpyVarToNumpy(affine_forward(NumpyVarToMinpy(x), mp_w, mp_b)[0]), w, dout) db_num = eval_numerical_gradient_array(lambda b: MinpyVarToNumpy(affine_forward(NumpyVarToMinpy(x), mp_w, mp_b)[0]), b, dout) _, cache = affine_forward(mp_x, mp_w, mp_b) mp_dx, mp_dw, mp_db = affine_backward(dout, cache) dx = MinpyVarToNumpy(mp_dx) dw = MinpyVarToNumpy(mp_dw) db = MinpyVarToNumpy(mp_db) # The error should be around 1e-10 print 'Testing affine_backward function:' print 'dx: ', dx print 'dw: ', dw print 'db: ', db print 'dx error: ', rel_error(dx_num, dx) print 'dw error: ', rel_error(dw_num, dw) print 'db error: ', rel_error(db_num, db)
def test_affine_backward(): x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) assert dx.shape == dx.shape assert dw.shape == dw.shape assert db.shape == db.shape assert rel_error(dx_num,dx) < 5e-7 assert rel_error(dw_num,dw) < 5e-7 assert rel_error(db_num,db) < 5e-7
def test_affine_backwards(): # Test the affine_backward function x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) assert_close(dx, dx_num) assert_close(dw, dw_num) assert_close(db, db_num) # The error should be around 1e-10 print 'Testing affine_backward function:' print db.shape, db_num.shape
def test_relu_backwards(): x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout) _, cache = relu_forward(x) dx = relu_backward(dout, cache) # The error should be around 1e-12 print 'Testing relu_backward function:' print 'dx error: ', assert_close(dx_num, dx)
def test_batch_norm_backwards(): # Gradient check batchnorm backward pass N, D = 4, 5 x = 5 * np.random.randn(N, D) + 12 gamma = np.random.randn(D) beta = np.random.randn(D) dout = np.random.randn(N, D) bn_param = {'mode': 'train'} fx = lambda x: batchnorm_forward(x, gamma, beta, bn_param)[0] fg = lambda a: batchnorm_forward(x, gamma, beta, bn_param)[0] fb = lambda b: batchnorm_forward(x, gamma, beta, bn_param)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) da_num = eval_numerical_gradient_array(fg, gamma, dout) db_num = eval_numerical_gradient_array(fb, beta, dout) _, cache = batchnorm_forward(x, gamma, beta, bn_param) dx, dgamma, dbeta = batchnorm_backward(dout, cache) print 'dgamma error: ', assert_close(da_num, dgamma) print 'dbeta error: ', assert_close(db_num, dbeta) print 'dx error: ', assert_close(dx_num, dx)
def test_sandwich_layers(samples = random.randrange(1,10)): for x in range(0,samples): x = np.random.randn(2, 3, 4) w = np.random.randn(12, 10) b = np.random.randn(10) dout = np.random.randn(2, 10) #need an input for gradient being backpropagated into this layer out, cache = affine_relu_forward(x, w, b) dx, dw, db = affine_relu_backward(dout, cache) dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout) assert rel_error(dx_num, dx) < 5e-7 assert rel_error(dw_num, dw) < 5e-7 assert rel_error(db_num, db) < 5e-7 assert dx.shape == dx_num.shape assert dw.shape == dw_num.shape assert db.shape == db_num.shape assert out.shape == dout.shape
def Relu_Backward(): np.random.seed(123) x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = eval_numerical_gradient_array(lambda x: MinpyVarToNumpy(relu_forward(NumpyVarToMinpy(x))[0]), x, dout) mp_x = NumpyVarToMinpy(x) mp_dout = NumpyVarToMinpy(dout) _, cache = relu_forward(mp_x) mp_dx = relu_backward(mp_dout, cache) dx = MinpyVarToNumpy(mp_dx) # The error should be around 1e-12 print 'Testing relu_backward function:' print 'dx', dx print 'dx_num', dx print 'dx error: ', rel_error(dx_num, dx)
print 'Running tests with p = ', p print 'Mean of input: ', x.mean() print 'Mean of train-time output: ', out.mean() print 'Mean of test-time output: ', out_test.mean() print 'Fraction of train-time output set to zero: ', (out == 0).mean() print 'Fraction of test-time output set to zero: ', (out_test == 0).mean() print x = np.random.randn(10, 10) + 10 dout = np.random.randn(*x.shape) dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123} out, cache = dropout_forward(x, dropout_param) dx = dropout_backward(dout, cache) dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout) print 'dx relative error: ', rel_error(dx, dx_num) # Train two identical nets, one with dropout and one without num_train = 500 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} dropout_choices = [0, 0.5] for dropout in dropout_choices:
out, _ = word_embedding_forward(x, W) expected_out = np.asarray([[[0., 0.07142857, 0.14285714], [0.64285714, 0.71428571, 0.78571429], [0.21428571, 0.28571429, 0.35714286], [0.42857143, 0.5, 0.57142857]], [[0.42857143, 0.5, 0.57142857], [0.21428571, 0.28571429, 0.35714286], [0., 0.07142857, 0.14285714], [0.64285714, 0.71428571, 0.78571429]]]) print('out error: ', rel_error(expected_out, out)) ################################################################################################################################################ print("\nword backward\n") ################################################################################################################################################ np.random.seed(231) N, T, V, D = 50, 3, 5, 6 x = np.random.randint(V, size=(N, T)) W = np.random.randn(V, D) out, cache = word_embedding_forward(x, W) dout = np.random.randn(*out.shape) dW = word_embedding_backward(dout, cache) f = lambda W: word_embedding_forward(x, W)[0] dW_num = eval_numerical_gradient_array(f, W, dout) print('dW error: ', rel_error(dW, dW_num))
print 'difference: ', rel_error(out, correct_out) # # Affine layer: backward # Now implement the `affine_backward` function and test your implementation using numeric gradient checking. # In[ ]: # Test the affine_backward function x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) # The error should be around 1e-10 print 'Testing affine_backward function:' print 'dx error: ', rel_error(dx_num, dx) print 'dw error: ', rel_error(dw_num, dw) print 'db error: ', rel_error(db_num, db) # # ReLU layer: forward # Implement the forward pass for the ReLU activation function in the `relu_forward` function and test your implementation using the following:
import numpy as np from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient def rel_error(x, y): """ returns relative error """ return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) x = np.random.randn(5, 6) w = np.random.randn(6, 7) dout = np.random.randn(5, 7) bn_param = {'mode': 'train'} gamma = np.random.randn(7) beta = np.random.randn(7) out, cache = aff_bn_relu_forward(x, w, gamma, beta, bn_param) dx, dw, dgamma, dbeta = aff_bn_relu_backward(dout, cache) dx_num = eval_numerical_gradient_array(lambda x: aff_bn_relu_forward(x, w, gamma, beta, bn_param)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: aff_bn_relu_forward(x, w, gamma, beta, bn_param)[0], w, dout) dgamma_num = eval_numerical_gradient_array(lambda gamma: aff_bn_relu_forward(x, w, gamma, beta, bn_param)[0], gamma, dout) dbeta_num = eval_numerical_gradient_array(lambda beta: aff_bn_relu_forward(x, w, gamma, beta, bn_param)[0], beta, dout) print 'Testing conv_relu:' print 'dx error: ', rel_error(dx_num, dx) print 'dw error: ', rel_error(dw_num, dw) print 'dgamma error: ', rel_error(dgamma_num, dgamma) print 'dbeta error: ', rel_error(dbeta_num, dbeta)
print 'Fraction of train-time output set to zero: ', (out == 0).mean() print 'Fraction of test-time output set to zero: ', (out_test == 0).mean() print # Dropout backward pass x = np.random.randn(10, 10) + 10 dout = np.random.randn(*x.shape) dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123} out, cache = dropout_forward(x, dropout_param) dx = dropout_backward(dout, cache) dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout) print 'dx relative error: ', rel_error(dx, dx_num) # Fully-connected nets with Dropout N, D, H1, H2, C = 2, 15, 20, 30, 10 X = np.random.randn(N, D) y = np.random.randint(C, size=(N,)) for dropout in [0, 0.25, 0.5]: print 'Running check with dropout = ', dropout model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C, weight_scale=5e-2, dtype=np.float64,
correct_out = np.array([[ 1.49834967, 1.70660132, 1.91485297], [ 3.25553199, 3.5141327, 3.77273342]]) # Compare your output with ours. The error should be around 1e-9. print out print '\nTesting affine_forward function:' print 'difference: ', rel_error(out, correct_out) # Test the affine_backward function x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) # The error should be less than 1e-10 print '\nTesting affine_backward function:' print 'dx error: ', rel_error(dx_num, dx) print 'dw error: ', rel_error(dw_num, dw) print 'db error: ', rel_error(db_num, db) # Test the relu_forward function x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)
# # Convolution: Naive backward pass # Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency. # # When you are done, run the following to check your backward pass with a numeric gradient check. # In[6]: x = np.random.randn(4, 3, 5, 5) w = np.random.randn(2, 3, 3, 3) b = np.random.randn(2,) dout = np.random.randn(4, 2, 5, 5) conv_param = {'stride': 1, 'pad': 1} dx_num = eval_numerical_gradient_array(lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout) out, cache = conv_forward_naive(x, w, b, conv_param) dx, dw, db = conv_backward_naive(dout, cache) # Your errors should be around 1e-9' print 'Testing conv_backward_naive function' print 'dx error: ', rel_error(dx, dx_num) print 'dw error: ', rel_error(dw, dw_num) print 'db error: ', rel_error(db, db_num) # # Max pooling: Naive forward
def main(): # The torchvision.transforms package provides tools for preprocessing data # and for performing data augmentation; here we set up a transform to # preprocess the data by subtracting the mean RGB value and dividing by the # standard deviation of each RGB value; we've hardcoded the mean and std. transform = T.Compose([ T.ToTensor(), T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) # We set up a Dataset object for each split (train / val / test); Datasets load # training examples one at a time, so we wrap each Dataset in a DataLoader which # iterates through the Dataset and forms minibatches. We divide the CIFAR-10 # training set into train and val sets by passing a Sampler object to the # DataLoader telling how it should sample from the underlying Dataset. # cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True, # transform=transform) # loader_train = DataLoader(cifar10_train, batch_size=64, # sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN))) cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True, transform=transform) loader_val = DataLoader(cifar10_val, batch_size=64, sampler=sampler.SubsetRandomSampler( range(NUM_TRAIN, 50000))) # cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, # transform=transform) # loader_test = DataLoader(cifar10_test, batch_size=64) # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** #train() np.random.seed(231) N, C, H, W = 2, 6, 4, 5 G = 2 D = C // G x = 5 * np.random.randn(N, C, H, W) + 12 gamma = np.random.randn(1, C, 1, 1) beta = np.random.randn(1, C, 1, 1) dout = np.random.randn(N, C, H, W) # x_reshaped = x.reshape((N*G, H*W*D)).T # fn = lambda n: fake_groupnorm_forward(x_reshaped)[0] # #cache = {'gamma': gamma} # dmean = np.ones((1, N*G)) # dn_num = eval_numerical_gradient_array(fn, x_reshaped, dmean) # mean, cache = fake_groupnorm_forward(x_reshaped) # dx_reshaped = fake_groupnorm_backward(dmean, cache) #You should expect errors of magnitudes between 1e-12~1e-07 gn_param = {} fx = lambda x: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0] fg = lambda a: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0] fb = lambda b: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) da_num = eval_numerical_gradient_array(fg, gamma, dout) db_num = eval_numerical_gradient_array(fb, beta, dout) _, cache = spatial_groupnorm_forward(x, gamma, beta, G, gn_param) dx, dgamma, dbeta = spatial_groupnorm_backward(dout, cache) print('dx error: ', rel_error(dx_num, dx)) print('dgamma error: ', rel_error(da_num, dgamma)) print('dbeta error: ', rel_error(db_num, dbeta))
# Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency. # # When you are done, run the following to check your backward pass with a numeric gradient check. # In[14]: # Load from matfile the parameters dictMat = scipy.io.loadmat('../../../test/layers/conv_backward_cs231n.mat') x = dictMat['x'] w = dictMat['w'] b = dictMat['b'] dout = dictMat['dout'] conv_param = {'stride': 1, 'pad': 1} dx_num = eval_numerical_gradient_array( lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout) dw_num = eval_numerical_gradient_array( lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout) # Test the conv with im2col version of forward/backward propagation out, cache = conv_forward_im2col(x, w, b, conv_param) dx, dw, db = conv_backward_im2col(dout, cache) # Your errors should be around 1e-9' print('Testing conv_backward_naive function') print('dx error: ', rel_error(dx, dx_num)) print('dw error: ', rel_error(dw, dw_num)) print('db error: ', rel_error(db, db_num))
out, _ = affine_forward(x, w, b) correct_out = np.array([[1.49834967, 1.70660132, 1.91485297], [3.25553199, 3.5141327, 3.77273342]]) # Compare your output with ours. The error should be around e-9 or less. print('Testing affine_forward function:') print('difference: ', rel_error(out, correct_out)) # Test the affine_backward function np.random.seed(231) x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) # The error should be around e-10 or less print('Testing affine_backward function:') print('dx error: ', rel_error(dx_num, dx)) print('dw error: ', rel_error(dw_num, dw)) print('db error: ', rel_error(db_num, db)) # Test the relu_forward function
h = np.random.randn(N, H) Wx = np.random.randn(D, H) Wh = np.random.randn(H, H) b = np.random.randn(H) out, cache = rnn_step_forward(x, h, Wx, Wh, b) dnext_h = np.random.randn(*out.shape) fx = lambda x: rnn_step_forward(x, h, Wx, Wh, b)[0] fh = lambda prev_h: rnn_step_forward(x, h, Wx, Wh, b)[0] fWx = lambda Wx: rnn_step_forward(x, h, Wx, Wh, b)[0] fWh = lambda Wh: rnn_step_forward(x, h, Wx, Wh, b)[0] fb = lambda b: rnn_step_forward(x, h, Wx, Wh, b)[0] dx_num = eval_numerical_gradient_array(fx, x, dnext_h) dprev_h_num = eval_numerical_gradient_array(fh, h, dnext_h) dWx_num = eval_numerical_gradient_array(fWx, Wx, dnext_h) dWh_num = eval_numerical_gradient_array(fWh, Wh, dnext_h) db_num = eval_numerical_gradient_array(fb, b, dnext_h) dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache) print('dx error: ', rel_error(dx_num, dx)) print('dprev_h error: ', rel_error(dprev_h_num, dprev_h)) print('dWx error: ', rel_error(dWx_num, dWx)) print('dWh error: ', rel_error(dWh_num, dWh)) print('db error: ', rel_error(db_num, db)) # # Vanilla RNN: forward # Now that you have implemented the forward and backward passes for a single timestep of a vanilla RNN, you will combine these pieces to implement a RNN that process an entire sequence of data.
print_mean_std(a_norm, axis=0) # Gradient check batchnorm backward pass np.random.seed(231) N, D = 4, 5 x = 5 * np.random.randn(N, D) + 12 gamma = np.random.randn(D) beta = np.random.randn(D) dout = np.random.randn(N, D) bn_param = {'mode': 'train'} fx = lambda x: batchnorm_forward(x, gamma, beta, bn_param)[0] fg = lambda a: batchnorm_forward(x, a, beta, bn_param)[0] fb = lambda b: batchnorm_forward(x, gamma, b, bn_param)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) da_num = eval_numerical_gradient_array(fg, gamma.copy(), dout) db_num = eval_numerical_gradient_array(fb, beta.copy(), dout) _, cache = batchnorm_forward(x, gamma, beta, bn_param) dx, dgamma, dbeta = batchnorm_backward(dout, cache) #You should expect to see relative errors between 1e-13 and 1e-8 print('dx error: ', rel_error(dx_num, dx)) print('dgamma error: ', rel_error(da_num, dgamma)) print('dbeta error: ', rel_error(db_num, dbeta)) np.random.seed(231) N, D = 100, 500 x = 5 * np.random.randn(N, D) + 12 gamma = np.random.randn(D) beta = np.random.randn(D)
# # Convolution: Naive backward pass # Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency. # # When you are done, run the following to check your backward pass with a numeric gradient check. # In[ ]: np.random.seed(231) x = np.random.randn(4, 3, 5, 5) w = np.random.randn(2, 3, 3, 3) b = np.random.randn(2, ) dout = np.random.randn(4, 2, 5, 5) conv_param = {'stride': 1, 'pad': 1} dx_num = eval_numerical_gradient_array( lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout) dw_num = eval_numerical_gradient_array( lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout) out, cache = conv_forward_naive(x, w, b, conv_param) dx, dw, db = conv_backward_naive(dout, cache) # Your errors should be around 1e-8' print('Testing conv_backward_naive function') print('dx error: ', rel_error(dx, dx_num)) print('dw error: ', rel_error(dw, dw_num)) print('db error: ', rel_error(db, db_num)) # # Max pooling: Naive forward
Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) out, cache = lstm_forward(x, h0, Wx, Wh, b) dout = np.random.randn(*out.shape) dx, dh0, dWx, dWh, db = lstm_backward(dout, cache) fx = lambda x: lstm_forward(x, h0, Wx, Wh, b)[0] fh0 = lambda h0: lstm_forward(x, h0, Wx, Wh, b)[0] fWx = lambda Wx: lstm_forward(x, h0, Wx, Wh, b)[0] fWh = lambda Wh: lstm_forward(x, h0, Wx, Wh, b)[0] fb = lambda b: lstm_forward(x, h0, Wx, Wh, b)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) dh0_num = eval_numerical_gradient_array(fh0, h0, dout) dWx_num = eval_numerical_gradient_array(fWx, Wx, dout) dWh_num = eval_numerical_gradient_array(fWh, Wh, dout) db_num = eval_numerical_gradient_array(fb, b, dout) print 'dx error: ', rel_error(dx_num, dx) print 'dh0 error: ', rel_error(dx_num, dx) print 'dWx error: ', rel_error(dx_num, dx) print 'dWh error: ', rel_error(dx_num, dx) print 'db error: ', rel_error(dx_num, dx) # #LSTM captioning model # Now that you have implemented an LSTM, update the implementation of the `loss` method of the `CaptioningRNN` class in the file `cs231n/classifiers/rnn.py` to handle the case where `self.cell_type` is `lstm`. This should require adding less than 10 lines of code. # # Once you have done so, run the following to check your implementation. You should see a difference of less than `1e-10`.
print ' Stds: ', out.std(axis=(0, 2, 3)) # Means should be close to beta and stds close to gamma gamma, beta = np.asarray([3, 4, 5]), np.asarray([6, 7, 8]) out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param) print 'After spatial batch normalization (nontrivial gamma, beta):' print ' Shape: ', out.shape print ' Means: ', out.mean(axis=(0, 2, 3)) print ' Stds: ', out.std(axis=(0, 2, 3)) #Spatial batch norm in backpass: N, C, H, W = 2, 3, 4, 5 x = 5 * np.random.randn(N, C, H, W) + 12 gamma = np.random.randn(C) beta = np.random.randn(C) dout = np.random.randn(N, C, H, W) bn_param = {'mode': 'train'} fx = lambda x: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0] fg = lambda a: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0] fb = lambda b: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) da_num = eval_numerical_gradient_array(fg, gamma, dout) db_num = eval_numerical_gradient_array(fb, beta, dout) _, cache = spatial_batchnorm_forward(x, gamma, beta, bn_param) dx, dgamma, dbeta = spatial_batchnorm_backward(dout, cache) print 'dx error: ', rel_error(dx_num, dx) print 'dgamma error: ', rel_error(da_num, dgamma) print 'dbeta error: ', rel_error(db_num, dbeta)
from cs231n.layers import * from cs231n.fast_layers import * from cs231n.solver import Solver plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # for auto-reloading external modules # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython def rel_error(x, y): """ returns relative error """ return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) np.random.seed(231) x = np.random.randn(3, 2, 8, 8) dout = np.random.randn(3, 2, 4, 4) pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} dx_num = eval_numerical_gradient_array( lambda x: max_pool_forward_naive(x, pool_param)[0], x, dout) out, cache = max_pool_forward_naive(x, pool_param) dx = max_pool_backward_naive(dout, cache) # Your error should be around 1e-12 print('Testing max_pool_backward_naive function:') print('dx error: ', rel_error(dx, dx_num))