Esempi in Python per eval_numerical_gradient, esempi in Python per cs231n.gradient_check.eval_numerical_gradient

Esempio n. 1

0

Mostra file

File: test_model.py Progetto: zhouxu179/cs231-homework-with-MinPy

def Debug():
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    for reg in [0, 3.14]:
        print 'Running check with reg = ', reg
        model = FullyConnectedNet([H1, H2],
                                  input_dim=D,
                                  num_classes=C,
                                  reg=reg,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  conv_mode='numpy')

        loss, grads = model.loss(X, y)
        print 'Initial loss: ', loss
        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name].asnumpy(),
                                               verbose=False,
                                               h=1e-5)
            print '%s relative error: %.2e' % (
                name, rel_error(grad_num, grads[name]))

Esempio n. 2

0

Mostra file

File: BatchNormalization.py Progetto: huiyiygy/cs231n

def check_fc_net_with_batch_normalization():
    np.random.seed(231)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    # You should expect losses between 1e-4~1e-10 for W,
    # losses between 1e-08~1e-10 for b,
    # and losses between 1e-08~1e-09 for beta and gammas.
    for reg in [0, 3.14]:
        print('Running check with reg = ', reg)
        model = FullyConnectedNet(hidden_dims=[H1, H2],
                                  input_dims=D,
                                  num_classes=C,
                                  reg=reg,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  normalization='batchnorm')
        loss, grads = model.loss(X, y)
        print('Initial loss:', loss)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name],
                                               verbose=False,
                                               h=1e-5)
            print('%s relative error: %.2e' %
                  (name, rel_error(grad_num, grads[name])))
        if reg == 0:
            print()

Esempio n. 3

0

Mostra file

File: ConvolutionalNetworks.py Progetto: huiyiygy/cs231n

def check_gradient():
    num_inputs = 2
    input_dim = (3, 16, 16)
    reg = 0.0
    num_classes = 10
    np.random.seed(231)
    X = np.random.randn(num_inputs, *input_dim)
    y = np.random.randint(num_classes, size=num_inputs)

    model = ThreeLayerConvNet(num_filters=3,
                              filter_size=3,
                              input_dim=input_dim,
                              hidden_dim=7,
                              dtype=np.float64)
    loss, grads = model.loss(X, y)
    # Errors should be small, but correct implementations may have
    # relative errors up to the order of e-2
    for param_name in sorted(grads):
        f = lambda _: model.loss(X, y)[0]
        param_grad_num = eval_numerical_gradient(f,
                                                 model.params[param_name],
                                                 verbose=False,
                                                 h=1e-6)
        e = rel_error(param_grad_num, grads[param_name])
        print('%s max relative error: %e' %
              (param_name, rel_error(param_grad_num, grads[param_name])))

Esempio n. 4

0

Mostra file

def check_dropout_with_fully_connected_net():
    np.random.seed(231)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    for dropout in [1, 0.9, 0.75, 0.5, 0.25]:
        print('Running check with dropout = ', dropout)
        model = FullyConnectedNet([H1, H2],
                                  input_dims=D,
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  dropout=dropout,
                                  seed=123)

        loss, grads = model.loss(X, y)
        print('Initial loss: ', loss)

        # Relative errors should be around e-6 or less; Note that it's fine
        # if for dropout=1 you have W2 error be on the order of e-5.
        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name],
                                               verbose=False,
                                               h=1e-5)
            print('%s relative error: %.2e' %
                  (name, rel_error(grad_num, grads[name])))
        print()

Esempio n. 5

0

Mostra file

File: FullyConnectedNets_Minpy.py Progetto: colinsongf/minpy

def Debug():
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N,))

    for reg in [0, 3.14]:
        print 'Running check with reg = ', reg
        model = FullyConnectedNet([H1, H2],
                                  input_dim=D,
                                  num_classes=C,
                                  reg=reg,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  conv_mode='numpy')

        loss, grads = model.loss(X, y)
        print 'Initial loss: ', loss
        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name].asnumpy(),
                                               verbose=False,
                                               h=1e-5)
            print '%s relative error: %.2e' % (name, rel_error(grad_num,
                                                               grads[name]))

Esempio n. 6

0

Mostra file

def fc_net_test():
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    for reg in [0, 3.14]:
        print('Running check with reg = {}'.format(reg))
        model = FullyConnectedNet([H1, H2],
                                  input_dim=D,
                                  num_classes=C,
                                  reg=reg,
                                  weight_scale=5e-2,
                                  dtype=np.float64)

        loss, grads = model.loss(X, y)
        print('Initial loss: {}'.format(loss))

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name],
                                               verbose=False,
                                               h=1e-5)
            print('{} relative error: {}'.format(
                name, rel_error(grad_num, grads[name])))

Esempio n. 7

0

Mostra file

def check_gradien_on_captioning_rnn():
    """
    perform numeric gradient checking on the CaptioningRNN class; you should see errors
    around the order of e-6 or less.
    """
    np.random.seed(231)

    batch_size = 2
    timesteps = 3
    input_dim = 4
    wordvec_dim = 5
    hidden_dim = 6
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    vocab_size = len(word_to_idx)

    captions = np.random.randint(vocab_size, size=(batch_size, timesteps))
    features = np.random.randn(batch_size, input_dim)

    model = CaptioningRNN(word_to_idx,
                          input_dim=input_dim,
                          wordvec_dim=wordvec_dim,
                          hidden_dim=hidden_dim,
                          cell_type='rnn',
                          dtype=np.float64)

    loss, grads = model.loss(features, captions)

    for param_name in sorted(grads):
        f = lambda _: model.loss(features, captions)[0]
        param_grad_num = eval_numerical_gradient(f,
                                                 model.params[param_name],
                                                 verbose=False,
                                                 h=1e-6)
        e = rel_error(param_grad_num, grads[param_name])
        print('%s relative error: %e' % (param_name, e))

Esempio n. 8

0

Mostra file

File: Dropout.py Progetto: MrTurtleW/code

def fully_connected_nets_with_dropout():
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    for dropout in [0, 0.25, 0.5]:
        print("Running check with dropout={}".format(dropout))
        model = FullyConnectedNet([H1, H2],
                                  input_dim=D,
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  dtype=np.float64,
                                  dropout=dropout,
                                  seed=123)
        loss, grads = model.loss(X, y)
        print("Initial loss: {}".format(loss))

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f,
                                               model.params[name],
                                               verbose=False,
                                               h=1e-5)
            print("{} relative error: {}".format(
                name, rel_error(grad_num, grads[name])))

Esempio n. 9

0

Mostra file

File: test_fullconnected.py Progetto: kingtaurus/cs231n

def test_twolayerNN():
    N, D, H, C = 3, 5, 50, 7
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=N)

    std = 1e-2
    model = TwoLayerNet(input_dim=D, hidden_dim=H, num_classes=C, weight_scale=std)

    print('Testing initialization ... ')
    W1_std = abs(model.params['W1'].std() - std)
    b1 = model.params['b1']
    W2_std = abs(model.params['W2'].std() - std)
    b2 = model.params['b2']
    assert W1_std < std / 10, 'First layer weights do not seem right'
    assert np.all(b1 == 0), 'First layer biases do not seem right'
    assert W2_std < std / 10, 'Second layer weights do not seem right'
    assert np.all(b2 == 0), 'Second layer biases do not seem right'

    print('Testing test-time forward pass ... ')
    model.params['W1'] = np.linspace(-0.7, 0.3, num=D*H).reshape(D, H)
    model.params['b1'] = np.linspace(-0.1, 0.9, num=H)
    model.params['W2'] = np.linspace(-0.3, 0.4, num=H*C).reshape(H, C)
    model.params['b2'] = np.linspace(-0.9, 0.1, num=C)
    X = np.linspace(-5.5, 4.5, num=N*D).reshape(D, N).T
    scores = model.loss(X)
    correct_scores = np.asarray(
      [[11.53165108,  12.2917344,   13.05181771,  13.81190102,  14.57198434, 15.33206765,  16.09215096],
       [12.05769098,  12.74614105,  13.43459113,  14.1230412,   14.81149128, 15.49994135,  16.18839143],
       [12.58373087,  13.20054771,  13.81736455,  14.43418138,  15.05099822, 15.66781506,  16.2846319 ]])
    scores_diff = np.abs(scores - correct_scores).sum()

    assert scores.shape == correct_scores.shape
    assert scores_diff < 1e-6, 'Problem with test-time forward pass'

    print('Testing training loss (no regularization)')
    y = np.asarray([0, 5, 1])
    loss, grads = model.loss(X, y)
    correct_loss = 3.4702243556
    assert abs(loss - correct_loss) < 1e-10, 'Problem with training-time loss'

    model.reg = 1.0
    loss, grads = model.loss(X, y)
    correct_loss = 26.5948426952
    assert abs(loss - correct_loss) < 1e-10, 'Problem with regularization loss'

    for reg in [0.0, 0.7]:
        print('Running numeric gradient check with reg = ', reg)
        model.reg = reg
        loss, grads = model.loss(X, y)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f, model.params[name], verbose=False)
            print('%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])))
            assert rel_error(grad_num,grads[name]) < 1e-6
            assert grad_num.shape == grads[name].shape
            assert grads[name].shape == model.params[name].shape

Esempio n. 10

0

Mostra file

File: test_fullconnected.py Progetto: Roozbehdz/cs231n

def test_twolayerNN():
	N, D, H, C = 3, 5, 50, 7
	X = np.random.randn(N, D)
	y = np.random.randint(C, size=N)

	std = 1e-2
	model = TwoLayerNet(input_dim=D, hidden_dim=H, num_classes=C, weight_scale=std)

	print('Testing initialization ... ')
	W1_std = abs(model.params['W1'].std() - std)
	b1 = model.params['b1']
	W2_std = abs(model.params['W2'].std() - std)
	b2 = model.params['b2']
	assert W1_std < std / 10, 'First layer weights do not seem right'
	assert np.all(b1 == 0), 'First layer biases do not seem right'
	assert W2_std < std / 10, 'Second layer weights do not seem right'
	assert np.all(b2 == 0), 'Second layer biases do not seem right'

	print('Testing test-time forward pass ... ')
	model.params['W1'] = np.linspace(-0.7, 0.3, num=D*H).reshape(D, H)
	model.params['b1'] = np.linspace(-0.1, 0.9, num=H)
	model.params['W2'] = np.linspace(-0.3, 0.4, num=H*C).reshape(H, C)
	model.params['b2'] = np.linspace(-0.9, 0.1, num=C)
	X = np.linspace(-5.5, 4.5, num=N*D).reshape(D, N).T
	scores = model.loss(X)
	correct_scores = np.asarray(
	  [[11.53165108,  12.2917344,   13.05181771,  13.81190102,  14.57198434, 15.33206765,  16.09215096],
	   [12.05769098,  12.74614105,  13.43459113,  14.1230412,   14.81149128, 15.49994135,  16.18839143],
	   [12.58373087,  13.20054771,  13.81736455,  14.43418138,  15.05099822, 15.66781506,  16.2846319 ]])
	scores_diff = np.abs(scores - correct_scores).sum()

	assert scores.shape == correct_scores.shape
	assert scores_diff < 1e-6, 'Problem with test-time forward pass'

	print('Testing training loss (no regularization)')
	y = np.asarray([0, 5, 1])
	loss, grads = model.loss(X, y)
	correct_loss = 3.4702243556
	assert abs(loss - correct_loss) < 1e-10, 'Problem with training-time loss'

	model.reg = 1.0
	loss, grads = model.loss(X, y)
	correct_loss = 26.5948426952
	assert abs(loss - correct_loss) < 1e-10, 'Problem with regularization loss'

	for reg in [0.0, 0.7]:
	    print('Running numeric gradient check with reg = ', reg)
	    model.reg = reg
	    loss, grads = model.loss(X, y)

	    for name in sorted(grads):
	        f = lambda _: model.loss(X, y)[0]
	        grad_num = eval_numerical_gradient(f, model.params[name], verbose=False)
	        print('%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])))
	        assert rel_error(grad_num,grads[name]) < 1e-6
	        assert grad_num.shape == grads[name].shape
	        assert grads[name].shape == model.params[name].shape

Esempio n. 11

0

Mostra file

File: test_fullconnected.py Progetto: kingtaurus/cs231n

def test_svm(num_classes, samples=random.randrange(1,10)):
    num_classes, num_inputs = num_classes, 50
    x = 0.001 * np.random.randn(num_inputs, num_classes)
    y = np.random.randint(num_classes, size=num_inputs)

    dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
    loss, dx = svm_loss(x, y)
    assert dx_num.shape == dx.shape
    assert loss > num_classes * ( 1 - 1.2 / num_classes)
    assert rel_error(dx_num, dx) < 5e-7

Esempio n. 12

0

Mostra file

File: test_fullconnected.py Progetto: Roozbehdz/cs231n

def test_svm(num_classes, samples=random.randrange(1,10)):
	num_classes, num_inputs = num_classes, 50
	x = 0.001 * np.random.randn(num_inputs, num_classes)
	y = np.random.randint(num_classes, size=num_inputs)

	dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
	loss, dx = svm_loss(x, y)
	assert dx_num.shape == dx.shape
	assert loss > num_classes * ( 1 - 1.2 / num_classes)
	assert rel_error(dx_num, dx) < 5e-7

Esempio n. 13

0

Mostra file

File: test_neuralnet.py Progetto: kingtaurus/cs231n

def test_toy_model_grad(init_toy_model, init_toy_data):
    net  = init_toy_model
    X, y = init_toy_data

    loss, grads = net.loss(X, y, reg=0.1)

    # these should all be less than 1e-8 or so
    for param_name in grads:
        f = lambda W: net.loss(X, y, reg=0.1)[0]
        param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
        assert rel_error(param_grad_num, grads[param_name]) < 5e-7

Esempio n. 14

0

Mostra file

File: test_neuralnet.py Progetto: xiongAlen/cs231n

def test_toy_model_grad(init_toy_model, init_toy_data):
    net = init_toy_model
    X, y = init_toy_data

    loss, grads = net.loss(X, y, reg=0.1)

    # these should all be less than 1e-8 or so
    for param_name in grads:
        f = lambda W: net.loss(X, y, reg=0.1)[0]
        param_grad_num = eval_numerical_gradient(f,
                                                 net.params[param_name],
                                                 verbose=False)
        assert rel_error(param_grad_num, grads[param_name]) < 5e-7

Esempio n. 15

0

Mostra file

def loss_layer_test():
    num_classes, num_inputs = 10, 50
    x = 0.001 * np.random.randn(num_inputs, num_classes)
    y = np.random.randint(num_classes, size=num_inputs)

    dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0],
                                     x,
                                     verbose=False)
    loss, dx = svm_loss(x, y)

    print("Testing svm_loss:")
    print("loss: {}".format(loss))
    print("dx error: {}".format(rel_error(dx_num, dx)))

    dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0],
                                     x,
                                     verbose=False)
    loss, dx = softmax_loss(x, y)

    print("Testing softmax loss")
    print("loss: {}".format(loss))
    print("dx error: {}".format(rel_error(dx_num, dx)))

Esempio n. 16

0

Mostra file

File: test.py Progetto: liviust/cs231n_perso_2

def gradient_check(model, X, y):
    # num_inputs = 2
    # input_dim = (3, 32, 32)
    # reg = 0.0
    # num_classes = 10
    # X = np.random.randn(num_inputs, *input_dim)
    # y = np.random.randint(num_classes, size=num_inputs)
    #
    # model = FlexNet(input_dim=input_dim, num_filters=(4,), hidden_dim=(10,), reg=reg, dtype=np.float64)
    # model.print_params()
    #
    # # Train a bit before grad check
    # model = overfit_small_data(model, epochs=4, verbose=False)
    #
    #model.loss_scale = 1e4
    #model.compute_hashes = True

    # TODO functional model
    # TODO check individual parts?
    # TODO check fewer dimensions
    # TODO test without reg and only reg
    # TODO try multiple h

    print '\n--- Gradient check ---'
    loss, grads = model.loss(X, y)
    results = {}
    avg = {}
    h = 1e-6
    for param_name in sorted(grads):

        def f(_):
            out = model.loss(X, y)
            return out[0]

        param_grad_num = eval_numerical_gradient(f,
                                                 model.params[param_name],
                                                 verbose=False,
                                                 h=h)
        avg[param_name] = np.mean(np.abs(grads[param_name])), np.mean(
            np.abs(param_grad_num))
        results[param_name] = rel_error(param_grad_num, grads[param_name])

    sys.stdout.flush()
    print 'Max relative error:   (h = {})'.format(h)
    print '{:<20} {:<13} {:<15}           {:<13} {:<13}'.format(
        'Param', 'Error', '', 'Ana', 'Num')
    for p in sorted(results):
        msg = gradient_check_message(results[p])
        print '{:<20} {:<13e} {:<15}   avgval: {:<13e} {:<13e}'.format(
            p, results[p], msg, avg[p][0], avg[p][1])

Esempio n. 17

0

Mostra file

def Test_loss():
    num_classes, num_inputs = 10, 50
    x = 0.001 * np.random.randn(num_inputs, num_classes)
    y = np.random.randint(num_classes, size=num_inputs)

    dx_num = eval_numerical_gradient(
        lambda x: svm_loss_forward(x, y)[0].asnumpy(), x, verbose=False)
    loss, cache = svm_loss_forward(x, y)
    dx = svm_loss_backward(cache)

    # Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
    print 'Testing svm_loss:'
    print 'loss: ', loss
    print 'dx error: ', rel_error(dx_num, dx.asnumpy())

    dx_num = eval_numerical_gradient(
        lambda x: softmax_loss_forward(x, y)[0].asnumpy(), x, verbose=False)
    loss, cache = softmax_loss_forward(x, y)
    dx = softmax_loss_backward(cache)

    # Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
    print '\nTesting softmax_loss:'
    print 'loss: ', loss
    print 'dx error: ', rel_error(dx_num, dx.asnumpy())

Esempio n. 18

0

Mostra file

File: test_softmax.py Progetto: kingtaurus/cs231n

def test_softmax_loss_vectorized_numerical_gradient(sample_train, train_count, reg=0.0):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[0]
    g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[1]

    grad_analytic = g(W)
    param_grad_num = eval_numerical_gradient(f, W, verbose=False, h=1e-7)
    assert rel_error(param_grad_num, grad_analytic) < 1e-4

Esempio n. 19

0

Mostra file

def check_temporal_sotfmax_loss():
    """
    In an RNN language model, at every timestep we produce a score for each word in the
    vocabulary. We know the ground-truth word at each timestep, so we use a softmax loss
    function to compute loss and gradient at each timestep. We sum the losses over time
    and average them over the minibatch.

    However there is one wrinkle: since we operate over minibatches and different captions
    may have different lengths, we append <NULL> tokens to the end of each caption so
    they all have the same length. We don't want these <NULL> tokens to count toward the
    loss or gradient, so in addition to scores and ground-truth labels our loss function
    also accepts a mask array that tells it which elements of the scores count towards
    the loss.

    Since this is very similar to the softmax loss function you implemented in assignment 1,
    we have implemented this loss function for you; look at the temporal_softmax_loss
    function in the file cs231n/rnn_layers.py.

    Run the following cell to sanity check the loss and perform numeric gradient checking
    on the function. You should see an error for dx on the order of e-7 or less.
    """
    N, T, V = 100, 1, 10

    def check_loss(N, T, V, p):
        x = 0.001 * np.random.randn(N, T, V)
        y = np.random.randint(V, size=(N, T))
        mask = np.random.rand(N, T) <= p
        print(temporal_softmax_loss(x, y, mask)[0])

    check_loss(100, 1, 10, 1.0)  # Should be about 2.3
    check_loss(100, 10, 10, 1.0)  # Should be about 23
    check_loss(5000, 10, 10, 0.1)  # Should be about 2.3

    # Gradient check for temporal softmax loss
    N, T, V = 7, 8, 9

    x = np.random.randn(N, T, V)
    y = np.random.randint(V, size=(N, T))
    mask = (np.random.rand(N, T) > 0.5)

    loss, dx = temporal_softmax_loss(x, y, mask, verbose=False)
    dx_num = eval_numerical_gradient(
        lambda x: temporal_softmax_loss(x, y, mask)[0], x, verbose=False)

    print('dx error: ', rel_error(dx, dx_num))

Esempio n. 20

0

Mostra file

File: test_softmax.py Progetto: xiongAlen/cs231n

def test_softmax_loss_vectorized_numerical_gradient(sample_train,
                                                    train_count,
                                                    reg=0.0):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[0]
    g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[1]

    grad_analytic = g(W)
    param_grad_num = eval_numerical_gradient(f, W, verbose=False, h=1e-7)
    assert rel_error(param_grad_num, grad_analytic) < 1e-4

Esempio n. 21

0

Mostra file

File: fc_net_test.py Progetto: afcarl/cs231n-assigments

def test_generalized_FullyConnectedNet():
  N, D, H1, H2, C = 2, 15, 20, 30, 10
  X = np.random.randn(N, D)
  y = np.random.randint(C, size=(N,))

  for reg in [0, 3.14]:
    print 'Running check with reg = ', reg
    model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
                              reg=reg, weight_scale=5e-2, dtype=np.float64)

    loss, grads = model.loss(X, y)
    print 'Initial loss: ', loss
    assert loss > 0

    for name in sorted(grads):
      f = lambda _: model.loss(X, y)[0]
      grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
      assert_close(grad_num, grads[name])

Esempio n. 22

0

Mostra file

File: FullyConnectedNets_hack_layer_level.py Progetto: HrWangChengdu/CS231n

def Test_SVM():
  num_classes, num_inputs = 10, 50
  x = 0.001 * np.random.randn(num_inputs, num_classes)
  y = np.random.randint(num_classes, size=num_inputs)
  mode = 'cpu'
  
  mp_x = NumpyVarToMinpy(x)
  mp_y = NumpyVarToMinpy(y)
  dx_num = eval_numerical_gradient(lambda x: MinpyVarToNumpy(svm_loss(NumpyVarToMinpy(x), mp_y, mode)[0]), x, verbose=False)
  mp_loss, mp_dx = svm_loss(mp_x, mp_y, mode)
  
  dx = MinpyVarToNumpy(mp_dx)
  loss = MinpyVarToNumpy(mp_loss)

  # Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
  print 'Testing svm_loss:'
  print 'loss: ', loss
  print 'numerical error: ', dx_num
  print 'analytical error: ', dx
  # Note: relative error would we large, because numeriacal error is unstable in gpu mode.
  print 'dx error: ', rel_error(dx_num, dx)

Esempio n. 23

0

Mostra file

def Test_SVM():
  np.random.seed(31)
  num_classes, num_inputs = 10, 50
  x = 0.001 * np.random.randn(num_inputs, num_classes)
  y = np.random.randint(num_classes, size=num_inputs)
  mode = 'cpu'
  
  mp_x = NumpyVarToMinpy(x)
  mp_y = NumpyVarToMinpy(y)
  dx_num = eval_numerical_gradient(lambda x: MinpyVarToNumpy(svm_loss(NumpyVarToMinpy(x), mp_y, mode)[0]), x, verbose=False)
  mp_loss, mp_dx = svm_loss(mp_x, mp_y, mode)
  
  dx = MinpyVarToNumpy(mp_dx)
  loss = MinpyVarToNumpy(mp_loss)

  # Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
  print 'Testing svm_loss:'
  print 'loss: ', loss
  #print 'numerical error: ', dx_num
  #print 'analytical error: ', dx
  # Note: relative error would we large, because numeriacal error is unstable in gpu mode.
  print 'dx error: ', rel_error(dx_num, dx)

Esempio n. 24

0

Mostra file

File: FullyConnectedNets.py Progetto: kivijoshi/ML_Solution

print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)


# # Loss layers: Softmax and SVM
# You implemented these loss functions in the last assignment, so we'll give them to you for free here. You should still make sure you understand how they work by looking at the implementations in `cs231n/layers.py`.
# 
# You can make sure that the implementations are correct by running the following:

# In[ ]:

num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
print 'Testing svm_loss:'
print 'loss: ', loss
print 'dx error: ', rel_error(dx_num, dx)

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print '\nTesting softmax_loss:'
print 'loss: ', loss
print 'dx error: ', rel_error(dx_num, dx)

Esempio n. 25

0

Mostra file

File: two_layer_net.py Progetto: leonardoaraujosantos/cs231n_2016_solutions

# Implement the rest of the function. This will compute the gradient of the loss with respect to the variables `W1`, `b1`, `W2`, and `b2`. Now that you (hopefully!) have a correctly implemented forward pass, you can debug your backward pass using a numeric gradient check:

# In[6]:

from cs231n.gradient_check import eval_numerical_gradient

# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.

loss, grads = net.loss(X, y, reg=0.1)

# these should all be less than 1e-8 or so
for param_name in grads:
  f = lambda W: net.loss(X, y, reg=0.1)[0]
  param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
  print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))


# # Train the network
# To train the network we will use stochastic gradient descent (SGD), similar to the SVM and Softmax classifiers. Look at the function `TwoLayerNet.train` and fill in the missing sections to implement the training procedure. This should be very similar to the training procedure you used for the SVM and Softmax classifiers. You will also have to implement `TwoLayerNet.predict`, as the training process periodically performs prediction to keep track of accuracy over time while the network trains.
# 
# Once you have implemented the method, run the code below to train a two-layer network on toy data. You should achieve a training loss less than 0.2.

# In[7]:

net = init_toy_model()
stats = net.train(X, y, X, y,
            learning_rate=1e-1, reg=1e-5,
            num_iters=100, verbose=False)

Esempio n. 26

0

Mostra file

File: RNN_Captioning.py Progetto: michaelyryi/cs231n-camp-exercise


check_loss(100, 1, 10, 1.0)  # Should be about 2.3
check_loss(100, 10, 10, 1.0)  # Should be about 23
check_loss(5000, 10, 10, 0.1)  # Should be about 2.3

# Gradient check for temporal softmax loss
N, T, V = 7, 8, 9

x = np.random.randn(N, T, V)
y = np.random.randint(V, size=(N, T))
mask = (np.random.rand(N, T) > 0.5)

loss, dx = temporal_softmax_loss(x, y, mask, verbose=False)

dx_num = eval_numerical_gradient(
    lambda x: temporal_softmax_loss(x, y, mask)[0], x, verbose=False)

print('dx error: ', rel_error(dx, dx_num))

#RNN for image captioning
N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

model = CaptioningRNN(word_to_idx,
                      input_dim=D,
                      wordvec_dim=W,
                      hidden_dim=H,
                      cell_type='rnn',
                      dtype=np.float64)

Esempio n. 27

0

Mostra file

File: FullyConnectedNets.py Progetto: yin-hong/cs231n-assignment

dw_num = eval_numerical_gradient_array(
    lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print('Testing affine_relu_forward:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

np.random.seed(231)
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
print('Testing svm_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0],
                                 x,
                                 verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print('\nTesting softmax_loss:')
print('loss: ', loss)

Esempio n. 28

0

Mostra file

    lambda x: affine_relu_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print 'Testing affine_relu_forward:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)

num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
print 'Testing svm_loss:'
print 'loss: ', loss
print 'dx error: ', rel_error(dx_num, dx)

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0],
                                 x,
                                 verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print '\nTesting softmax_loss:'
print 'loss: ', loss

Esempio n. 29

0

Mostra file

File: modular_neural_nets.py Progetto: melody-rain/convnet-layers

dout = np.random.randn(*x.shape)

dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout)

_, cache = relu_forward(x)
dx = relu_backward(dout, cache)

# The error should be around 1e-12
print '\nTesting relu_backward function:'
print 'dx error: ', rel_error(dx_num, dx)

num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be 1e-9
print '\nTesting svm_loss:'
print 'loss: ', loss
print 'dx error: ', rel_error(dx_num, dx)

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print '\nTesting softmax_loss:'
print 'loss: ', loss
print 'dx error: ', rel_error(dx_num, dx)

Esempio n. 30

0

Mostra file

File: two_layer_network.py Progetto: melody-rain/convnet-layers

# should be very small, we get 5e-12
print 'Difference between your loss and correct loss:'
print np.sum(np.abs(loss - correct_loss))

from cs231n.gradient_check import eval_numerical_gradient

# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.

loss, grads = two_layer_net(X, model, y, reg)

# these should all be less than 1e-8 or so
for param_name in grads:
  param_grad_num = eval_numerical_gradient(lambda W: two_layer_net(X, model, y, reg)[0], model[param_name], verbose=False)
  print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))

from cs231n.classifier_trainer import ClassifierTrainer

model = init_toy_model()
trainer = ClassifierTrainer()
# call the trainer to optimize the loss
# Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data)
best_model, loss_history, _, _ = trainer.train(X, y, X, y,
                                             model, two_layer_net,
                                             reg=0.001,
                                             learning_rate=1e-1, momentum=0.0, learning_rate_decay=1,
                                             update='sgd', sample_batches=False,
                                             num_epochs=100,
                                             verbose=False)

Esempio n. 31

0

Mostra file

File: main.py Progetto: ChenS676/cs231n_ex

print(np.sum(np.abs(loss - correct_loss)))


# Backward pass
from cs231n.gradient_check import eval_numerical_gradient

# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.

loss, grads = net.loss(X, y, reg=0.05)

# these should all be less than 1e-8 or so
for param_name in grads:
    f = lambda W: net.loss(X, y, reg=0.05)[0]
    param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
    print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))

# Train the network
net = init_toy_model()
stats = net.train(X, y, X, y,
            learning_rate=1e-1, reg=5e-6,
            num_iters=100, verbose=False)

print('Final training loss: ', stats['loss_history'][-1])

# plot the loss history
plt.plot(stats['loss_history'])
plt.xlabel('iteration')
plt.ylabel('training loss')
plt.title('Training Loss history')

Esempio n. 32

0

Mostra file

File: Neural_Network.py Progetto: naufalD/Machine-Learning-Stuff

def TestNeuralNet():
    net = init_toy_model()
    X, y = init_toy_data()

    scores = net.loss(X)
    print('Your scores:')
    print(scores)
    print('correct scores:')
    correct_scores = np.asarray([[-0.81233741, -1.27654624, -0.70335995],
                                 [-0.17129677, -1.18803311, -0.47310444],
                                 [-0.51590475, -1.01354314, -0.8504215],
                                 [-0.15419291, -0.48629638, -0.52901952],
                                 [-0.00618733, -0.12435261, -0.15226949]])
    print(correct_scores)

    # The difference should be very small. We get < 1e-7
    print('Difference between your scores and correct scores:')
    print(np.sum(np.abs(scores - correct_scores)))

    loss, _ = net.loss(X, y, reg=0.05)
    correct_loss = 1.30378789133

    # should be very small, we get < 1e-12
    print('Difference between your loss and correct loss:')
    print(np.sum(np.abs(loss - correct_loss)))
    # Use numeric gradient checking to check your implementation of the backward pass.
    # If your implementation is correct, the difference between the numeric and
    # analytic gradients should be less than 1e-8 for each of W1, W2, b1, and
    # b2.

    loss, grads = net.loss(X, y, reg=0.05)

    # these should all be less than 1e-8 or so
    for param_name in grads:

        def f(W):
            return net.loss(X, y, reg=0.1)[0]

        param_grad_num = eval_numerical_gradient(f,
                                                 net.params[param_name],
                                                 verbose=False)
        print('%s max relative error: %e' %
              (param_name, rel_error(param_grad_num, grads[param_name])))

    net = init_toy_model()
    stats = net.train(X,
                      y,
                      X,
                      y,
                      learning_rate=1e-1,
                      reg=5e-6,
                      num_iters=100,
                      verbose=False)

    print('Final training loss: ', stats['loss_history'][-1])

    # plot the loss history
    plt.plot(stats['loss_history'])
    plt.xlabel('iteration')
    plt.ylabel('training loss')
    plt.title('Training Loss history')
    plt.show()

Esempio n. 33

0

Mostra file

    print(temporal_softmax_loss(x, y, mask)[0])
  
check_loss(100, 1, 10, 1.0)   # Should be about 2.3
check_loss(100, 10, 10, 1.0)  # Should be about 23
check_loss(5000, 10, 10, 0.1) # Should be about 2.3

# Gradient check for temporal softmax loss
N, T, V = 7, 8, 9

x = np.random.randn(N, T, V)
y = np.random.randint(V, size=(N, T))
mask = (np.random.rand(N, T) > 0.5)

loss, dx = temporal_softmax_loss(x, y, mask, verbose=False)

dx_num = eval_numerical_gradient(lambda x: temporal_softmax_loss(x, y, mask)[0], x, verbose=False)

print('dx error: ', rel_error(dx, dx_num))


# # RNN for image captioning
# Now that you have implemented the necessary layers, you can combine them to build an image captioning model. Open the file `cs231n/classifiers/rnn.py` and look at the `CaptioningRNN` class.
# 
# Implement the forward and backward pass of the model in the `loss` function. For now you only need to implement the case where `cell_type='rnn'` for vanialla RNNs; you will implement the LSTM case later. After doing so, run the following to check your forward pass using a small test case; you should see error less than `1e-10`.

# In[ ]:

N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

Esempio n. 34

0

Mostra file

File: ex_convnet.py Progetto: TaihuLight/CS231n

# print 'Sanity check loss (with regularization): ', loss

##########Gradient check
##########for super convnet
num_inputs = 2
input_shape = (3, 16, 16)
reg = 0.0
num_classes = 10
X = np.random.randn(num_inputs, *input_shape)
y = np.random.randint(num_classes, size=num_inputs)

model = init_super_convnet(num_filters=3, filter_size=3, input_shape=input_shape)
loss, grads = super_convnet(X, model, y)
for param_name in sorted(grads):
    f = lambda _: super_convnet(X, model, y)[0]
    param_grad_num = eval_numerical_gradient(f, model[param_name], verbose=False, h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))
##############for three_layer##########
# num_inputs = 2
# input_shape = (3, 16, 16)
# reg = 0.0
# num_classes = 10
# X = np.random.randn(num_inputs, *input_shape)
# y = np.random.randint(num_classes, size=num_inputs)

# model = init_three_layer_convnet(num_filters=3, filter_size=3, input_shape=input_shape)
# loss, grads = three_layer_convnet(X, model, y)

# for param_name in sorted(grads):
#     f = lambda _: three_layer_convnet(X, model, y)[0]

Esempio n. 35

0

Mostra file

File: batchnormalization_main.py Progetto: bajun2016/cs231n_assignment_note

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N,))

for reg in [0, 3.14]:
  print 'Running check with reg = ', reg
  model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
                            reg=reg, weight_scale=5e-2, dtype=np.float64,
                            use_batchnorm=True)

  loss, grads = model.loss(X, y)
  print 'Initial loss: ', loss

  for name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
    print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name]))
  if reg == 0: print

Esempio n. 36

0

Mostra file

def toy_data():
    # Create a small net and some toy data
    input_size = 4
    hidden_size = 10
    num_classes = 3
    num_inputs = 5

    net = _init_toy_model(input_size, hidden_size, num_classes)
    X, y = _init_toy_data(num_inputs, input_size)

    # Forward pass: compute scores
    scores = net.loss(X)
    print('Your scores:')
    print(scores)
    print()
    correct_scores = np.asarray([[-0.81233741, -1.27654624, -0.70335995],
                                 [-0.17129677, -1.18803311, -0.47310444],
                                 [-0.51590475, -1.01354314, -0.8504215],
                                 [-0.15419291, -0.48629638, -0.52901952],
                                 [-0.00618733, -0.12435261, -0.15226949]])
    print('Current scores')
    print(correct_scores)

    print('Difference between your scores and correct scores:')
    print(np.sum(np.abs(scores - correct_scores)))

    # Forward pass: compute loss
    loss, _ = net.loss(X, y, reg=0.1)
    correct_loss = 1.30378789133

    print("Difference between your loss and correct loss.")
    print(np.sum(np.abs(loss - correct_loss)))

    # Backward pass
    loss, grads = net.loss(X, y, reg=0.1)
    for param_name in grads:
        f = lambda W: net.loss(X, y, reg=0.1)[0]
        param_grad_num = eval_numerical_gradient(f,
                                                 net.params[param_name],
                                                 verbose=False)
        print('{} max relative error: {}'.format(
            param_name, rel_error(param_grad_num, grads[param_name])))

    # Train the network
    net = _init_toy_model(input_size, hidden_size, num_classes)
    stats = net.train(X,
                      y,
                      X,
                      y,
                      learning_rate=1e-1,
                      reg=1e-5,
                      num_iters=100,
                      verbose=False)

    print('Final training loss: {}'.format(stats['loss_history'][-1]))

    plt.plot(stats['loss_history'])
    plt.xlabel('iteration')
    plt.ylabel('training loss')
    plt.title('Training Loss history')
    plt.show()

Esempio n. 37

0

Mostra file

#You should expect to see relative errors between 1e-13 and 1e-8
print('dx error: ', rel_error(dx_num, dx))
print('dgamma error: ', rel_error(da_num, dgamma))
print('dbeta error: ', rel_error(db_num, dbeta))
"""
#################################################################################################################################
print("\n**********************Batch Normalization:FC_NET***********************************************\n")
#################################################################################################################################

np.random.seed(231)
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N,))

# You should expect losses between 1e-4~1e-10 for W, 
# losses between 1e-08~1e-10 for b,
# and losses between 1e-08~1e-09 for beta and gammas.
for reg in [0, 3.14]:
  print('Running check with reg = ', reg)
  model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
                            reg=reg, weight_scale=5e-2, dtype=np.float64,
                            normalization='batchnorm')
  loss, grads = model.loss(X, y)
  print('Initial loss: ', loss)

  for name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
    print('%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])))
  if reg == 0: print()

Esempio n. 38

0

Mostra file

File: two_layer_net.py Progetto: sangyh/NeuralNetwork_tutorial

# # Backward pass
# Implement the rest of the function. This will compute the gradient of the loss with respect to the variables `W1`, `b1`, `W2`, and `b2`. Now that you (hopefully!) have a correctly implemented forward pass, you can debug your backward pass using a numeric gradient check:

from cs231n.gradient_check import eval_numerical_gradient

# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.

loss, grads = two_layer_net(X, model, y, reg)
#print('grads of W2',grads['W2'])

# these should all be less than 1e-8 or so
for param_name in grads:
    param_grad_num = eval_numerical_gradient(
        lambda W: two_layer_net(X, model, y, reg)[0],
        model[param_name],
        verbose=False)
    print('%s max relative error: %e' %
          (param_name, rel_error(param_grad_num, grads[param_name])))

###############################################################################

# # Train the network
# To train the network we will use SGD with Momentum.
#Open the file `classifier_trainer.py` and familiarize yourself with the `ClassifierTrainer`
#class. It performs optimization given an arbitrary cost function data, and model.
#By default it uses vanilla SGD, which you need to implement.
#First, run the optimization below using Vanilla SGD:

from cs231n.classifier_trainer import ClassifierTrainer