def test_softmax_loss_vectorized_numerical_check(sample_train, train_count, reg=0.0, check_count=20): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain -= mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.) f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[0] g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[1] #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h) #grad(loss) = grad vectorized num_checks = check_count grad_analytic = g(W) for i in range(num_checks): ix = tuple([random.randrange(m) for m in W.shape]) shift = np.zeros(W.shape) shift[ix] = 1e-7 grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) assert (abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
def test_softmax_loss_vectorized_comparison_mean(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain_mean_removed = Xtrain - mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) Xtrain_mean_removed = np.hstack([Xtrain_mean_removed, np.ones((Xtrain_mean_removed.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg) loss_mean_removed, grad_mean_removed = softmax_loss_vectorized(W, Xtrain_mean_removed, ytrain, reg) #assert np.abs(loss - loss_mean_removed) > 0.01 assert np.linalg.norm(grad - grad_mean_removed) > 1.0
def softmax_test_vectorized(x, y): # 随机生成一个很小的Softmax权重矩阵,先标准正态分布,然后乘0.0001 W = np.random.randn(3073, 10) * 0.0001 # 计算softmax分类器的损失和权重的梯度(无正则项) loss, gradient = softmax_loss_vectorized(W, x, y, 0.0) # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中 # 定义一个lambda表达式,计算损失值loss, f = lambda w: softmax_loss_vectorized(w, x, y, 0.0)[0] grad_check_sparse(f, W, gradient) print('turn on regularization') # 计算softmax分类器的损失和权重的梯度(有正则项) loss, gradient = softmax_loss_vectorized(W, x, y, 5e1) # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中 # 定义一个lambda表达式,计算损失值loss, f = lambda w: softmax_loss_vectorized(w, x, y, 5e1)[0] grad_check_sparse(f, W, gradient)
def test_softmax_loss_vectorized_numerical_gradient(sample_train, train_count, reg=0.0): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain -= mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.) f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[0] g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[1] grad_analytic = g(W) param_grad_num = eval_numerical_gradient(f, W, verbose=False, h=1e-7) assert rel_error(param_grad_num, grad_analytic) < 1e-4
def Softmax(train_data, train_label, validation_data, validation_label, test_data, test_label): W = np.random.randn(10, 3072) * 0.0001 ''' loss, grad = softmax_loss_naive(W, train_data, train_label, 0.000005) print 'loss: %f \n' % loss print 'sanity check: %f' % (-np.log(0.1)) def f(w): return softmax_loss_naive(w, train_data, train_label, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) loss, grad = softmax_loss_naive(W, train_data, train_label, 5e1) def f(w): return softmax_loss_naive(w, train_data, train_label, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) ''' tic = time.time() loss_naive, grad_naive = softmax_loss_naive( W, train_data, train_label, 0.000005) toc = time.time() print('naive loss: %e computed in %fs' % (loss_naive, toc - tic)) tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, train_data, train_label, 0.000005) toc = time.time() print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) print('Gradient difference: %f' % grad_difference)
def test_softmax_loss_naive_vectorized_comparison(sample_train_with_bias, train_count): Xtrain, ytrain = sample_train_with_bias(count=train_count) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, 1e2) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_loss_vectorized_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001 with pytest.raises(ValueError): loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_vectorized_numerical_gradient(sample_train, train_count, reg=0.0): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain -= mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.) f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[0] g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[1] grad_analytic = g(W) param_grad_num = eval_numerical_gradient(f, W, verbose=False, h=1e-7) assert rel_error(param_grad_num, grad_analytic) < 1e-4
def test_softmax_loss_vectorized_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001 with pytest.raises(ValueError): loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_naive_vectorized_comparison(sample_train_with_bias, train_count): Xtrain, ytrain = sample_train_with_bias(count=train_count) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, 1e2) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_loss_vectorized_comparison_mean(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain_mean_removed = Xtrain - mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) Xtrain_mean_removed = np.hstack( [Xtrain_mean_removed, np.ones((Xtrain_mean_removed.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg) loss_mean_removed, grad_mean_removed = softmax_loss_vectorized( W, Xtrain_mean_removed, ytrain, reg) #assert np.abs(loss - loss_mean_removed) > 0.01 assert np.linalg.norm(grad - grad_mean_removed) > 1.0
def test_softmax_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, reg) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def run_softmax_naive(X_train, y_train): # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(10, 3073) * 0.0001 start = time.clock() loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0) end = time.clock() print "softmax_loss_naive: %f s" % (end - start) # As a rough sanity check, our loss should be something close to -log(0.1). print 'loss: %f' % loss print 'sanity check: %f' % (-np.log(0.1))
def test_softmax_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, reg) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_loss_vectorized_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 #add the bias dimension (to X) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_vectorized_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1],10) * 0.0001 #add the bias dimension (to X) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_vectorized_numerical_check(sample_train, train_count, reg=0.0, check_count=20): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain -= mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.) f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[0] g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[1] #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h) #grad(loss) = grad vectorized num_checks = check_count grad_analytic = g(W) for i in range(num_checks): ix = tuple([random.randrange(m) for m in W.shape]) shift = np.zeros(W.shape) shift[ix] = 1e-7 grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic) from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, X_dev, y_dev, 0.00001) toc = time.time() print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic) # As we did for the SVM, we use the Frobenius norm to compare the two versions # of the gradient. grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized) print 'Gradient difference: %f' % grad_difference # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of over 0.35 on the validation set. from cs231n.classifiers.linear_classifier import Softmax
def loss(self, X_batch, y_batch, reg): return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) #%% # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005) toc = time.time() print('naive loss: %e computed in %fs' % (loss_naive, toc - tic)) from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, X_dev, y_dev, 0.000005) toc = time.time() print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) # As we did for the SVM, we use the Frobenius norm to compare the two versions # of the gradient. grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) print('Gradient difference: %f' % grad_difference) # As we did for the SVM, use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. from cs231n.gradient_check import grad_check_sparse loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 5e1) f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad, 10)
# yTrain = np.reshape(yTrain[:lengthTrain]) yTrain = yTrain[:lengthTrain] xTest = np.reshape(xTest[:lengthTest],(lengthTest,-1)) # yTest = np.reshape(yTest[:lengthTest]) yTest = yTest[:lengthTest] xTrain = (xTrain - np.mean(xTrain,axis = 0))/(np.std(xTrain,axis = 0)) W = np.random.randn(xTrain.shape[1],10)*0.001 loss,grad = softmax.softmax_loss_naive(W,xTrain,yTrain,100) # exit() f = lambda w: softmax.softmax_loss_naive(w,xTrain,yTrain,0)[0] # grad_numerical = gradient_check.grad_check_sparse(f,W,grad,10) loss_naive,grad_naive = softmax.softmax_loss_naive(W,xTrain,yTrain,0) loss_vectorized,grad_vectorized = softmax.softmax_loss_vectorized(W,xTrain,yTrain,0) grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print(loss_naive) print(loss_vectorized) print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) print('Gradient difference: %f' % grad_difference) # exit() learning_rate = 3e-4 regStrengths = xrange(0,200,50) cvFold = 5 bestAccuracy = -1 bestReg = -1
# Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(10, 3073) * 0.0001 loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_train, y_train, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic) from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, X_train, y_train, 0.00001) toc = time.time() print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic) # As we did for the SVM, we use the Frobenius norm to compare the two versions # of the gradient. grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized) print 'Gradient difference: %f' % grad_difference # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of over 0.35 on the validation set. from cs231n.classifiers.linear_classifier import Softmax results = {}
# In[ ]: # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic) from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.00001) toc = time.time() print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic) # As we did for the SVM, we use the Frobenius norm to compare the two versions # of the gradient. grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized) print 'Gradient difference: %f' % grad_difference # In[ ]: # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to
import h5py from cs231n.classifiers import Softmax from numpy import loadtxt import numpy as np h5f = h5py.File('img_data.h5','r') X = h5f['dataset_1'][:] h5f.close() y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False) #X_train = np.zeros((27116,196608)) #y_train = np.zeros(27116) #X_val = np.zeros((5000,196608)) #y_val = np.zeros(5000) X_train = X[8000:35117,:] y_train = y[8000:35117] X_val=X[3000:8000,:] y_val=y[3000:8000] # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(196608, 5) * 0.0001 loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_train, y_train, 0.00001) softmax=Softmax() loss_hist = softmax.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=False) y_train_pred = softmax.predict(X_train) training_accuracy = np.mean(y_train == y_train_pred) y_val_pred = softmax.predict(X_val) val_accuracy = np.mean(y_val == y_val_pred) print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), ) print 'validation accuracy: %f' % (np.mean(y_val == y_val_pred), )
# Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data( ) print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) print('dev data shape: ', X_dev.shape) print('dev labels shape: ', y_dev.shape) from cs231n.classifiers.softmax import softmax_loss_vectorized import time # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print('loss: %f' % loss) print('sanity check: %f' % (-np.log(0.1)))
# In[ ]: # First implement the naive softmax loss function with nested loops. # Open the file cs231n/classifiers/softmax.py and implement the # softmax_loss_naive function. from cs231n.classifiers.softmax import softmax_loss_naive import time # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.00001) toc = time.time() print 'vectorized loss: %e computed in %fs' % (loss, toc - tic) from cs231n.gradient_check import grad_check_sparse f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 0.00001)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # As a rough sanity check, our loss should be something close to -log(0.1). print 'loss: %f' % loss print 'sanity check: %f' % (-np.log(0.1)) """ ################### I dont want to implement naive version ! BY BINGO
print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(10, 3073) * 0.0001 loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print 'loss: %f' % loss print 'sanity check: %f' % (-np.log(0.1)) # Complete the implementation of softmax_loss_naive and implement a (naive) # version of the gradient that uses nested loops. loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0) # As we did for the SVM, use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. f = lambda w: softmax_loss_vectorized(w, X_train, y_train, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_train, y_train, 0.00001) toc = time.time()