예제 #1
0
def Test_Affine_Backward():
    # Test the affine_backward function
    np.random.seed(123)
    x = np.random.randn(10, 2, 3)
    w = np.random.randn(6, 5)
    b = np.random.randn(5)
    dout = np.random.randn(10, 5)

    mp_x = NumpyVarToMinpy(x)
    mp_w = NumpyVarToMinpy(w)
    mp_b = NumpyVarToMinpy(b)

    dx_num = eval_numerical_gradient_array(lambda x: MinpyVarToNumpy(affine_forward(NumpyVarToMinpy(x), mp_w, mp_b)[0]), x, dout)
    dw_num = eval_numerical_gradient_array(lambda w: MinpyVarToNumpy(affine_forward(NumpyVarToMinpy(x), mp_w, mp_b)[0]), w, dout)
    db_num = eval_numerical_gradient_array(lambda b: MinpyVarToNumpy(affine_forward(NumpyVarToMinpy(x), mp_w, mp_b)[0]), b, dout)

    _, cache = affine_forward(mp_x, mp_w, mp_b)
    mp_dx, mp_dw, mp_db = affine_backward(dout, cache)

    dx = MinpyVarToNumpy(mp_dx)
    dw = MinpyVarToNumpy(mp_dw)
    db = MinpyVarToNumpy(mp_db)

    # The error should be around 1e-10
    print 'Testing affine_backward function:'
    print 'dx: ', dx
    print 'dw: ', dw
    print 'db: ', db
    print 'dx error: ', rel_error(dx_num, dx)
    print 'dw error: ', rel_error(dw_num, dw)
    print 'db error: ', rel_error(db_num, db)
예제 #2
0
def test_affine_backward():
    x = np.random.randn(10, 2, 3)
    w = np.random.randn(6, 5)
    b = np.random.randn(5)
    dout = np.random.randn(10, 5)

    dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
    dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
    db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

    _, cache = affine_forward(x, w, b)
    dx, dw, db = affine_backward(dout, cache)

    assert dx.shape == dx.shape
    assert dw.shape == dw.shape
    assert db.shape == db.shape

    assert rel_error(dx_num,dx) < 5e-7
    assert rel_error(dw_num,dw) < 5e-7
    assert rel_error(db_num,db) < 5e-7
예제 #3
0
def test_affine_backwards():
  # Test the affine_backward function

  x = np.random.randn(10, 2, 3)
  w = np.random.randn(6, 5)
  b = np.random.randn(5)
  dout = np.random.randn(10, 5)

  dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
  dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
  db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

  _, cache = affine_forward(x, w, b)
  dx, dw, db = affine_backward(dout, cache)
  assert_close(dx, dx_num)
  assert_close(dw, dw_num)
  assert_close(db, db_num)
  # The error should be around 1e-10
  print 'Testing affine_backward function:'
  print db.shape, db_num.shape
예제 #4
0
def test_relu_backwards():
  x = np.random.randn(10, 10)
  dout = np.random.randn(*x.shape)

  dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout)

  _, cache = relu_forward(x)
  dx = relu_backward(dout, cache)

  # The error should be around 1e-12
  print 'Testing relu_backward function:'
  print 'dx error: ', assert_close(dx_num, dx)
예제 #5
0
def test_batch_norm_backwards():
  # Gradient check batchnorm backward pass

  N, D = 4, 5
  x = 5 * np.random.randn(N, D) + 12
  gamma = np.random.randn(D)
  beta = np.random.randn(D)
  dout = np.random.randn(N, D)

  bn_param = {'mode': 'train'}
  fx = lambda x: batchnorm_forward(x, gamma, beta, bn_param)[0]
  fg = lambda a: batchnorm_forward(x, gamma, beta, bn_param)[0]
  fb = lambda b: batchnorm_forward(x, gamma, beta, bn_param)[0]

  dx_num = eval_numerical_gradient_array(fx, x, dout)
  da_num = eval_numerical_gradient_array(fg, gamma, dout)
  db_num = eval_numerical_gradient_array(fb, beta, dout)

  _, cache = batchnorm_forward(x, gamma, beta, bn_param)
  dx, dgamma, dbeta = batchnorm_backward(dout, cache)
  print 'dgamma error: ', assert_close(da_num, dgamma)
  print 'dbeta error: ', assert_close(db_num, dbeta)
  print 'dx error: ', assert_close(dx_num, dx)
예제 #6
0
def test_sandwich_layers(samples = random.randrange(1,10)):
    for x in range(0,samples):
        x = np.random.randn(2, 3, 4)
        w = np.random.randn(12, 10)
        b = np.random.randn(10)

        dout = np.random.randn(2, 10)
        #need an input for gradient being backpropagated into this layer

        out, cache = affine_relu_forward(x, w, b)
        dx, dw, db = affine_relu_backward(dout, cache)

        dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout)
        dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout)
        db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)

        assert rel_error(dx_num, dx) < 5e-7
        assert rel_error(dw_num, dw) < 5e-7
        assert rel_error(db_num, db) < 5e-7

        assert dx.shape == dx_num.shape
        assert dw.shape == dw_num.shape
        assert db.shape == db_num.shape
        assert out.shape == dout.shape
def Relu_Backward():
  np.random.seed(123)
  x = np.random.randn(10, 10)
  dout = np.random.randn(*x.shape)
  
  dx_num = eval_numerical_gradient_array(lambda x: MinpyVarToNumpy(relu_forward(NumpyVarToMinpy(x))[0]), x, dout)
  
  mp_x = NumpyVarToMinpy(x)
  mp_dout = NumpyVarToMinpy(dout)
  _, cache = relu_forward(mp_x)
  mp_dx = relu_backward(mp_dout, cache)
  dx = MinpyVarToNumpy(mp_dx)

  # The error should be around 1e-12
  print 'Testing relu_backward function:'
  print 'dx', dx
  print 'dx_num', dx
  print 'dx error: ', rel_error(dx_num, dx)
예제 #8
0
  print 'Running tests with p = ', p
  print 'Mean of input: ', x.mean()
  print 'Mean of train-time output: ', out.mean()
  print 'Mean of test-time output: ', out_test.mean()
  print 'Fraction of train-time output set to zero: ', (out == 0).mean()
  print 'Fraction of test-time output set to zero: ', (out_test == 0).mean()
  print

x = np.random.randn(10, 10) + 10
dout = np.random.randn(*x.shape)

dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123}
out, cache = dropout_forward(x, dropout_param)
dx = dropout_backward(dout, cache)
dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

print 'dx relative error: ', rel_error(dx, dx_num)

# Train two identical nets, one with dropout and one without
num_train = 500
small_data = {
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val'],
}

solvers = {}
dropout_choices = [0, 0.5]
for dropout in dropout_choices:
예제 #9
0
out, _ = word_embedding_forward(x, W)
expected_out = np.asarray([[[0., 0.07142857, 0.14285714],
                            [0.64285714, 0.71428571, 0.78571429],
                            [0.21428571, 0.28571429, 0.35714286],
                            [0.42857143, 0.5, 0.57142857]],
                           [[0.42857143, 0.5, 0.57142857],
                            [0.21428571, 0.28571429, 0.35714286],
                            [0., 0.07142857, 0.14285714],
                            [0.64285714, 0.71428571, 0.78571429]]])

print('out error: ', rel_error(expected_out, out))

################################################################################################################################################
print("\nword backward\n")
################################################################################################################################################

np.random.seed(231)

N, T, V, D = 50, 3, 5, 6
x = np.random.randint(V, size=(N, T))
W = np.random.randn(V, D)

out, cache = word_embedding_forward(x, W)
dout = np.random.randn(*out.shape)
dW = word_embedding_backward(dout, cache)

f = lambda W: word_embedding_forward(x, W)[0]
dW_num = eval_numerical_gradient_array(f, W, dout)

print('dW error: ', rel_error(dW, dW_num))
예제 #10
0
print 'difference: ', rel_error(out, correct_out)


# # Affine layer: backward
# Now implement the `affine_backward` function and test your implementation using numeric gradient checking.

# In[ ]:

# Test the affine_backward function

x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)

# The error should be around 1e-10
print 'Testing affine_backward function:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)


# # ReLU layer: forward
# Implement the forward pass for the ReLU activation function in the `relu_forward` function and test your implementation using the following:
예제 #11
0
import numpy as np
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

x = np.random.randn(5, 6)
w = np.random.randn(6, 7)
dout = np.random.randn(5, 7)
bn_param = {'mode': 'train'}
gamma = np.random.randn(7)
beta = np.random.randn(7)

out, cache = aff_bn_relu_forward(x, w, gamma, beta, bn_param)
dx, dw, dgamma, dbeta = aff_bn_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: aff_bn_relu_forward(x, w, gamma, beta,
                                                                     bn_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: aff_bn_relu_forward(x, w, gamma, beta,
                                                                     bn_param)[0], w, dout)
dgamma_num = eval_numerical_gradient_array(lambda gamma: aff_bn_relu_forward(x, w, gamma, beta,
                                                                     bn_param)[0], gamma, dout)
dbeta_num = eval_numerical_gradient_array(lambda beta: aff_bn_relu_forward(x, w, gamma, beta,
                                                                     bn_param)[0], beta, dout)
print 'Testing conv_relu:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'dgamma error: ', rel_error(dgamma_num, dgamma)
print 'dbeta error: ', rel_error(dbeta_num, dbeta)
  print 'Fraction of train-time output set to zero: ', (out == 0).mean()
  print 'Fraction of test-time output set to zero: ', (out_test == 0).mean()
  print





# Dropout backward pass
x = np.random.randn(10, 10) + 10
dout = np.random.randn(*x.shape)

dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123}
out, cache = dropout_forward(x, dropout_param)
dx = dropout_backward(dout, cache)
dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

print 'dx relative error: ', rel_error(dx, dx_num)




# Fully-connected nets with Dropout
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N,))

for dropout in [0, 0.25, 0.5]:
  print 'Running check with dropout = ', dropout
  model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
                            weight_scale=5e-2, dtype=np.float64,
correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                        [ 3.25553199,  3.5141327,   3.77273342]])

# Compare your output with ours. The error should be around 1e-9.
print out
print '\nTesting affine_forward function:'
print 'difference: ', rel_error(out, correct_out)

# Test the affine_backward function

x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)

# The error should be less than 1e-10
print '\nTesting affine_backward function:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)

# Test the relu_forward function

x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

# # Convolution: Naive backward pass
# Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency.
# 
# When you are done, run the following to check your backward pass with a numeric gradient check.

# In[6]:

x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-9'
print 'Testing conv_backward_naive function'
print 'dx error: ', rel_error(dx, dx_num)

print 'dw error: ', rel_error(dw, dw_num)
print 'db error: ', rel_error(db, db_num)


# # Max pooling: Naive forward
예제 #15
0
def main():
    # The torchvision.transforms package provides tools for preprocessing data
    # and for performing data augmentation; here we set up a transform to
    # preprocess the data by subtracting the mean RGB value and dividing by the
    # standard deviation of each RGB value; we've hardcoded the mean and std.
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])

    # We set up a Dataset object for each split (train / val / test); Datasets load
    # training examples one at a time, so we wrap each Dataset in a DataLoader which
    # iterates through the Dataset and forms minibatches. We divide the CIFAR-10
    # training set into train and val sets by passing a Sampler object to the
    # DataLoader telling how it should sample from the underlying Dataset.
    # cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
    #                             transform=transform)
    # loader_train = DataLoader(cifar10_train, batch_size=64,
    #                         sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

    cifar10_val = dset.CIFAR10('./cs231n/datasets',
                               train=True,
                               download=True,
                               transform=transform)
    loader_val = DataLoader(cifar10_val,
                            batch_size=64,
                            sampler=sampler.SubsetRandomSampler(
                                range(NUM_TRAIN, 50000)))

    # cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True,
    #                             transform=transform)
    # loader_test = DataLoader(cifar10_test, batch_size=64)
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    #train()

    np.random.seed(231)
    N, C, H, W = 2, 6, 4, 5
    G = 2
    D = C // G
    x = 5 * np.random.randn(N, C, H, W) + 12
    gamma = np.random.randn(1, C, 1, 1)
    beta = np.random.randn(1, C, 1, 1)
    dout = np.random.randn(N, C, H, W)

    # x_reshaped = x.reshape((N*G, H*W*D)).T
    # fn = lambda n: fake_groupnorm_forward(x_reshaped)[0]
    # #cache = {'gamma': gamma}
    # dmean = np.ones((1, N*G))
    # dn_num = eval_numerical_gradient_array(fn, x_reshaped, dmean)
    # mean, cache = fake_groupnorm_forward(x_reshaped)
    # dx_reshaped = fake_groupnorm_backward(dmean, cache)

    #You should expect errors of magnitudes between 1e-12~1e-07
    gn_param = {}
    fx = lambda x: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0]
    fg = lambda a: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0]
    fb = lambda b: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0]

    dx_num = eval_numerical_gradient_array(fx, x, dout)
    da_num = eval_numerical_gradient_array(fg, gamma, dout)
    db_num = eval_numerical_gradient_array(fb, beta, dout)

    _, cache = spatial_groupnorm_forward(x, gamma, beta, G, gn_param)
    dx, dgamma, dbeta = spatial_groupnorm_backward(dout, cache)
    print('dx error: ', rel_error(dx_num, dx))
    print('dgamma error: ', rel_error(da_num, dgamma))
    print('dbeta error: ', rel_error(db_num, dbeta))
예제 #16
0
# Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency.
#
# When you are done, run the following to check your backward pass with a numeric gradient check.

# In[14]:

# Load from matfile the parameters
dictMat = scipy.io.loadmat('../../../test/layers/conv_backward_cs231n.mat')
x = dictMat['x']
w = dictMat['w']
b = dictMat['b']
dout = dictMat['dout']

conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(
    lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

# Test the conv with im2col version of forward/backward propagation
out, cache = conv_forward_im2col(x, w, b, conv_param)
dx, dw, db = conv_backward_im2col(dout, cache)

# Your errors should be around 1e-9'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))
out, _ = affine_forward(x, w, b)
correct_out = np.array([[1.49834967, 1.70660132, 1.91485297],
                        [3.25553199, 3.5141327, 3.77273342]])

# Compare your output with ours. The error should be around e-9 or less.
print('Testing affine_forward function:')
print('difference: ', rel_error(out, correct_out))

# Test the affine_backward function
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x,
                                       dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w,
                                       dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b,
                                       dout)

_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)

# The error should be around e-10 or less
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

# Test the relu_forward function
예제 #18
0
h = np.random.randn(N, H)
Wx = np.random.randn(D, H)
Wh = np.random.randn(H, H)
b = np.random.randn(H)

out, cache = rnn_step_forward(x, h, Wx, Wh, b)

dnext_h = np.random.randn(*out.shape)

fx = lambda x: rnn_step_forward(x, h, Wx, Wh, b)[0]
fh = lambda prev_h: rnn_step_forward(x, h, Wx, Wh, b)[0]
fWx = lambda Wx: rnn_step_forward(x, h, Wx, Wh, b)[0]
fWh = lambda Wh: rnn_step_forward(x, h, Wx, Wh, b)[0]
fb = lambda b: rnn_step_forward(x, h, Wx, Wh, b)[0]

dx_num = eval_numerical_gradient_array(fx, x, dnext_h)
dprev_h_num = eval_numerical_gradient_array(fh, h, dnext_h)
dWx_num = eval_numerical_gradient_array(fWx, Wx, dnext_h)
dWh_num = eval_numerical_gradient_array(fWh, Wh, dnext_h)
db_num = eval_numerical_gradient_array(fb, b, dnext_h)

dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)

print('dx error: ', rel_error(dx_num, dx))
print('dprev_h error: ', rel_error(dprev_h_num, dprev_h))
print('dWx error: ', rel_error(dWx_num, dWx))
print('dWh error: ', rel_error(dWh_num, dWh))
print('db error: ', rel_error(db_num, db))

# # Vanilla RNN: forward
# Now that you have implemented the forward and backward passes for a single timestep of a vanilla RNN, you will combine these pieces to implement a RNN that process an entire sequence of data.
예제 #19
0
print_mean_std(a_norm, axis=0)

# Gradient check batchnorm backward pass
np.random.seed(231)
N, D = 4, 5
x = 5 * np.random.randn(N, D) + 12
gamma = np.random.randn(D)
beta = np.random.randn(D)
dout = np.random.randn(N, D)

bn_param = {'mode': 'train'}
fx = lambda x: batchnorm_forward(x, gamma, beta, bn_param)[0]
fg = lambda a: batchnorm_forward(x, a, beta, bn_param)[0]
fb = lambda b: batchnorm_forward(x, gamma, b, bn_param)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
da_num = eval_numerical_gradient_array(fg, gamma.copy(), dout)
db_num = eval_numerical_gradient_array(fb, beta.copy(), dout)

_, cache = batchnorm_forward(x, gamma, beta, bn_param)
dx, dgamma, dbeta = batchnorm_backward(dout, cache)
#You should expect to see relative errors between 1e-13 and 1e-8
print('dx error: ', rel_error(dx_num, dx))
print('dgamma error: ', rel_error(da_num, dgamma))
print('dbeta error: ', rel_error(db_num, dbeta))

np.random.seed(231)
N, D = 100, 500
x = 5 * np.random.randn(N, D) + 12
gamma = np.random.randn(D)
beta = np.random.randn(D)
예제 #20
0
# # Convolution: Naive backward pass
# Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency.
#
# When you are done, run the following to check your backward pass with a numeric gradient check.

# In[ ]:

np.random.seed(231)
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2, )
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(
    lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-8'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

# # Max pooling: Naive forward
Wh = np.random.randn(H, 4 * H)
b = np.random.randn(4 * H)

out, cache = lstm_forward(x, h0, Wx, Wh, b)

dout = np.random.randn(*out.shape)

dx, dh0, dWx, dWh, db = lstm_backward(dout, cache)

fx = lambda x: lstm_forward(x, h0, Wx, Wh, b)[0]
fh0 = lambda h0: lstm_forward(x, h0, Wx, Wh, b)[0]
fWx = lambda Wx: lstm_forward(x, h0, Wx, Wh, b)[0]
fWh = lambda Wh: lstm_forward(x, h0, Wx, Wh, b)[0]
fb = lambda b: lstm_forward(x, h0, Wx, Wh, b)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
dh0_num = eval_numerical_gradient_array(fh0, h0, dout)
dWx_num = eval_numerical_gradient_array(fWx, Wx, dout)
dWh_num = eval_numerical_gradient_array(fWh, Wh, dout)
db_num = eval_numerical_gradient_array(fb, b, dout)

print 'dx error: ', rel_error(dx_num, dx)
print 'dh0 error: ', rel_error(dx_num, dx)
print 'dWx error: ', rel_error(dx_num, dx)
print 'dWh error: ', rel_error(dx_num, dx)
print 'db error: ', rel_error(dx_num, dx)

# #LSTM captioning model
# Now that you have implemented an LSTM, update the implementation of the `loss` method of the `CaptioningRNN` class in the file `cs231n/classifiers/rnn.py` to handle the case where `self.cell_type` is `lstm`. This should require adding less than 10 lines of code.
#
# Once you have done so, run the following to check your implementation. You should see a difference of less than `1e-10`.
예제 #22
0
파일: cnn_dr.py 프로젝트: ankit2802/CS231N
print '  Stds: ', out.std(axis=(0, 2, 3))

# Means should be close to beta and stds close to gamma
gamma, beta = np.asarray([3, 4, 5]), np.asarray([6, 7, 8])
out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)
print 'After spatial batch normalization (nontrivial gamma, beta):'
print '  Shape: ', out.shape
print '  Means: ', out.mean(axis=(0, 2, 3))
print '  Stds: ', out.std(axis=(0, 2, 3))
#Spatial batch norm in backpass:
N, C, H, W = 2, 3, 4, 5
x = 5 * np.random.randn(N, C, H, W) + 12
gamma = np.random.randn(C)
beta = np.random.randn(C)
dout = np.random.randn(N, C, H, W)

bn_param = {'mode': 'train'}
fx = lambda x: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]
fg = lambda a: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]
fb = lambda b: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
da_num = eval_numerical_gradient_array(fg, gamma, dout)
db_num = eval_numerical_gradient_array(fb, beta, dout)

_, cache = spatial_batchnorm_forward(x, gamma, beta, bn_param)
dx, dgamma, dbeta = spatial_batchnorm_backward(dout, cache)
print 'dx error: ', rel_error(dx_num, dx)
print 'dgamma error: ', rel_error(da_num, dgamma)
print 'dbeta error: ', rel_error(db_num, dbeta)
예제 #23
0
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

plt.rcParams['figure.figsize'] = (10.0, 8.0)  # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


np.random.seed(231)
x = np.random.randn(3, 2, 8, 8)
dout = np.random.randn(3, 2, 4, 4)
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

dx_num = eval_numerical_gradient_array(
    lambda x: max_pool_forward_naive(x, pool_param)[0], x, dout)

out, cache = max_pool_forward_naive(x, pool_param)
dx = max_pool_backward_naive(dout, cache)

# Your error should be around 1e-12
print('Testing max_pool_backward_naive function:')
print('dx error: ', rel_error(dx, dx_num))