# To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you:

# In[12]:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
#loss, grad = svm_loss_naive(W, X_train, y_train, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse
f = lambda w: svm_loss_naive(w, X_train, y_train, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)


# ###Inline Question 1:
# It is possible that once in a while a dimension in the gradcheck will not match exactly.
# What could such a discrepancy be caused by? Is it a reason for concern?
# What is a simple example in one dimension where a gradient check could fail?
# *Hint: the SVM loss function is not strictly speaking differentiable*
#
# **Your Answer:** *fill this in.*

# In[ ]:

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
Exemple #2
0
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1)
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

## -------- vectorized ---------
# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))
tic = time.time()
Exemple #3
0
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)
print('\n')
''' Generate weight matrix and conduct softmax loss computation using naive version '''
W = np.random.randn(3073, 10) * 1e-4
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
# Since W is initialized to very small values, loss should come out to ~(-log(0.1))
print(f"naive loss computation: {loss} -log(0.1): {-np.log(0.1)}")
# Check gradient calculation for accuracy
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0]
grad_check_sparse(f, W, grad, 10)
# Another check, with regularization this time
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.000005)[0]
grad_check_sparse(f, W, grad, 10)
''' Repeat for vectorized implementation and compare '''
loss_vectorized, grad_vectorized = softmax_loss_vectorized(
    W, X_dev, y_dev, 0.000005)
print(f'vectorized loss: {loss_vectorized} -log(0.1): {-np.log(0.1)}')

# Use the Frobenius norm to compare the two versions of the gradient.
grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('Loss difference: ', np.abs(loss_naive - loss_vectorized))
print('Gradient difference: ', grad_difference)
''' Use the validation set to tune hyperparams -
        regularization strength and learning rate '''
Exemple #4
0
loss_n, grad_n = softmax_loss_naive(X_dev, y_dev, W_dev, 0.005)
print('loss_n: %f' % (loss_n, ))
print('grad: {0}'.format(grad_n[:2, :5]))
loss_v, grad_v = softmax_loss_vectorized(X_dev, y_dev, W_dev, 0.005)
print('loss_v: %f' % (loss_v, ))
print('grad: {0}'.format(grad_v[:2, :5]))

# In[ ]:

from cs231n.gradient_check import grad_check_sparse

# In[68]:

f = lambda w: softmax_loss_naive(X_dev, y_dev, w, 0.0)[0]
grad_numerical = grad_check_sparse(f, W_dev, grad_n)

# In[65]:

f = lambda w: softmax_loss_vectorized(X_dev, y_dev, w, 0.0)[0]
grad_numerical = grad_check_sparse(f, W_dev, grad_v)

# In[70]:

import time
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(X_dev, y_dev, W_dev, 0.005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))

tic = time.time()
Exemple #5
0
X_test = X_test[mask]
y_test = y_test[mask]

# 训练模型
X_train = np.reshape(X_train, (X_train.shape[0], -1))  # 1维展开
X_train = np.hstack([X_train, np.ones([X_train.shape[0], 1])])
X_test = np.reshape(X_test, (X_test.shape[0], -1))  # 1维展开
X_test = np.hstack([X_test, np.ones([X_test.shape[0], 1])])
num_class = 10
W = np.random.randn(X_train.shape[1], num_class) * 0.001
# 检查数值梯度和解析梯度
from cs231n.classifiers import softmax_loss_naive, softmax_loss_vectorized
loss, grad = softmax_loss_naive(W, X_train, y_train, 0.5)
from cs231n.gradient_check import grad_check_sparse
f = lambda w: softmax_loss_vectorized(w, X_train, y_train, 0.5)[0]
grad_check_sparse(f, W, grad)
from cs231n.classifiers import Softmax
classifer = Softmax()
loss_hist = classifer.train(X_train,
                            y_train,
                            verbose=True,
                            num_iters=5000,
                            batch_size=100)
plt.plot(loss_hist)
plt.xlabel('Step')
plt.ylabel('Loss')
plt.show()
# 泛化准确率
y_pred = classifer.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print("Test accuracy:", accuracy)
Exemple #6
0
def main():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()

    num_training = 49000
    num_dev = 500
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    # Generate a random softmax weight matrix and use it to compute the loss.
    W = np.random.randn(3073, 10) * 0.0001
    loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)

    # As a rough sanity check, our loss should be something close to -log(0.1).
    # Since the weight matrix W is uniform randomly selected, the predicted probability
    # of each class is uniform distribution and identically equals 1/10, where 10 is the number of classes
    print('loss: %f' % loss)
    print('sanity check: %f' % (-np.log(0.1)))

    f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0]
    grad_numerical = grad_check_sparse(f, W, grad, 10)

    # similar to SVM case, do another gradient check with regularization
    loss, grad = softmax_loss_naive(W, X_dev, y_dev, 1e2)
    f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 1e2)[0]
    grad_numerical = grad_check_sparse(f, W, grad, 10)

    # implement a vectorized version in softmax_loss_vectorized.
    tic = time.time()
    loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001)
    toc = time.time()
    print('Naive loss: {} computed in {}'.format(loss_naive, toc - tic))

    tic = time.time()
    loss_vectorized, grad_vectorized = softmax_loss_vectorized(
        W, X_dev, y_dev, 0.00001)
    toc = time.time()
    print('Vectorized loss: {} computed in {}'.format(loss_naive, toc - tic))

    grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
    print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
    print('Gradient difference: %f' % grad_difference)

    # Use the validation set to tune hyperparameters (regularization strength and
    # learning rate). You should experiment with different ranges for the learning
    # rates and regularization strengths; if you are careful you should be able to
    # get a classification accuracy of over 0.35 on the validation set.
    results = {}
    best_val = -1
    best_softmax = None
    learning_rates = [1e-7, 2e-7, 5e-7]
    #regularization_strengths = [5e4, 1e8]
    regularization_strengths = [(1 + 0.1 * i) * 1e4 for i in range(-3, 4)
                                ] + [(5 + 0.1 * i) * 1e4 for i in range(-3, 4)]

    for lr in learning_rates:
        for rs in regularization_strengths:
            print('Traing SVM with rs {} and lr {}'.format(rs, lr))
            softmax = Softmax()
            softmax.train(X_train, y_train, lr, rs, num_iters=2000)
            y_train_pred = softmax.predict(X_train)
            train_accuracy = np.mean(y_train == y_train_pred)
            y_val_pred = softmax.predict(X_val)
            val_accuracy = np.mean(y_val == y_val_pred)
            if val_accuracy > best_val:
                best_val = val_accuracy
                best_softmax = softmax
            results[(lr, rs)] = train_accuracy, val_accuracy

    # Print out results.
    for lr, reg in sorted(results):
        train_accuracy, val_accuracy = results[(lr, reg)]
        print('lr %e reg %e train accuracy: %f val accuracy: %f' %
              (lr, reg, train_accuracy, val_accuracy))

    print('best validation accuracy achieved during cross-validation: %f' %
          best_val)

    # Evaluate the best softmax on test set
    y_test_pred = best_softmax.predict(X_test)
    test_accuracy = np.mean(y_test == y_test_pred)
    print('softmax on raw pixels final test set accuracy: %f' %
          (test_accuracy, ))

    # Visualize the learned weights for each class
    w = best_softmax.W[:-1, :]  # strip out the bias
    w = w.reshape(32, 32, 3, 10)

    w_min, w_max = np.min(w), np.max(w)

    classes = [
        'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship',
        'truck'
    ]
    for i in range(10):
        plt.subplot(2, 5, i + 1)

        # Rescale the weights to be between 0 and 255
        wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
        plt.imshow(wimg.astype('uint8'))
        plt.axis('off')
        plt.title(classes[i])