def test_sgd_momentum():
  N, D = 4, 5
  w = np.linspace(-0.4, 0.6, num=N*D).reshape(N, D)
  dw = np.linspace(-0.6, 0.4, num=N*D).reshape(N, D)
  v = np.linspace(0.6, 0.9, num=N*D).reshape(N, D)

  config = {'learning_rate': 1e-3, 'velocity': v}
  next_w, _ = sgd_momentum(w, dw, config=config)

  expected_next_w = np.asarray([
    [ 0.1406,      0.20738947,  0.27417895,  0.34096842,  0.40775789],
    [ 0.47454737,  0.54133684,  0.60812632,  0.67491579,  0.74170526],
    [ 0.80849474,  0.87528421,  0.94207368,  1.00886316,  1.07565263],
    [ 1.14244211,  1.20923158,  1.27602105,  1.34281053,  1.4096    ]])
  expected_velocity = np.asarray([
    [ 0.5406,      0.55475789,  0.56891579, 0.58307368,  0.59723158],
    [ 0.61138947,  0.62554737,  0.63970526,  0.65386316,  0.66802105],
    [ 0.68217895,  0.69633684,  0.71049474,  0.72465263,  0.73881053],
    [ 0.75296842,  0.76712632,  0.78128421,  0.79544211,  0.8096    ]])

  assert_close(next_w, expected_next_w)
  assert_close(expected_velocity, config['velocity'])
Exemple #2
0
def test_sgd_momentum():
    N, D = 4, 5
    w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D)
    dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D)
    v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D)

    config = {'learning_rate': 1e-3, 'velocity': v}
    next_w, _ = sgd_momentum(w, dw, config=config)

    expected_next_w = np.asarray(
        [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789],
         [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526],
         [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263],
         [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]])
    expected_velocity = np.asarray(
        [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158],
         [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105],
         [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053],
         [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]])

    assert_close(next_w, expected_next_w)
    assert_close(expected_velocity, config['velocity'])
Exemple #3
0
plt.plot(solver.loss_history, 'o')
plt.title('Training loss history')
plt.xlabel('Iteration')
plt.ylabel('Training loss')
plt.show()

from cs231n.optim import sgd_momentum

N, D = 4, 5
w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D)
dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D)
v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D)

config = {'learning_rate': 1e-3, 'velocity': v}
next_w, _ = sgd_momentum(w, dw, config=config)

expected_next_w = np.asarray(
    [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789],
     [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526],
     [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263],
     [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]])
expected_velocity = np.asarray(
    [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158],
     [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105],
     [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053],
     [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]])

print 'next_w error: ', rel_error(next_w, expected_next_w)
print 'velocity error: ', rel_error(expected_velocity, config['velocity'])
# # SGD+Momentum
# Stochastic gradient descent with momentum is a widely used update rule that tends to make deep networks converge faster than vanilla stochstic gradient descent.
# 
# Open the file `cs231n/optim.py` and read the documentation at the top of the file to make sure you understand the API. Implement the SGD+momentum update rule in the function `sgd_momentum` and run the following to check your implementation. You should see errors less than 1e-8.

# In[ ]:

from cs231n.optim import sgd_momentum

N, D = 4, 5
w = np.linspace(-0.4, 0.6, num=N*D).reshape(N, D)
dw = np.linspace(-0.6, 0.4, num=N*D).reshape(N, D)
v = np.linspace(0.6, 0.9, num=N*D).reshape(N, D)

config = {'learning_rate': 1e-3, 'velocity': v}
next_w, _ = sgd_momentum(w, dw, config=config)

expected_next_w = np.asarray([
  [ 0.1406,      0.20738947,  0.27417895,  0.34096842,  0.40775789],
  [ 0.47454737,  0.54133684,  0.60812632,  0.67491579,  0.74170526],
  [ 0.80849474,  0.87528421,  0.94207368,  1.00886316,  1.07565263],
  [ 1.14244211,  1.20923158,  1.27602105,  1.34281053,  1.4096    ]])
expected_velocity = np.asarray([
  [ 0.5406,      0.55475789,  0.56891579, 0.58307368,  0.59723158],
  [ 0.61138947,  0.62554737,  0.63970526,  0.65386316,  0.66802105],
  [ 0.68217895,  0.69633684,  0.71049474,  0.72465263,  0.73881053],
  [ 0.75296842,  0.76712632,  0.78128421,  0.79544211,  0.8096    ]])

print 'next_w error: ', rel_error(next_w, expected_next_w)
print 'velocity error: ', rel_error(expected_velocity, config['velocity'])
# # SGD+Momentum
# Stochastic gradient descent with momentum is a widely used update rule that tends to make deep networks converge faster than vanilla stochstic gradient descent.
#
# Open the file `cs231n/optim.py` and read the documentation at the top of the file to make sure you understand the API. Implement the SGD+momentum update rule in the function `sgd_momentum` and run the following to check your implementation. You should see errors less than 1e-8.

# In[ ]:

from cs231n.optim import sgd_momentum

N, D = 4, 5
w_ = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D)
dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D)
v_ = np.linspace(0.6, 0.9, num=N * D).reshape(N, D)

config = {'learning_rate': 1e-3, 'velocity': v_}
next_w, config1 = sgd_momentum(w_, dw, config=config)

expected_next_w = np.asarray(
    [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789],
     [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526],
     [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263],
     [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]])
expected_velocity = np.asarray(
    [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158],
     [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105],
     [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053],
     [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]])
print('sgd_moment evaluation')
print('next_w error: ', rel_error(next_w, expected_next_w))
print('velocity error: ', rel_error(expected_velocity, config['velocity']))