def test_SVM_grad_vectorized_comparison_sparse(sample_train, check_count): Xtrain, ytrain = sample_train(count=500) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain -= mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 0.) f = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[0] g = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[1] #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h) #grad(loss) = grad vectorized num_checks = check_count grad_analytic = g(W) for i in range(num_checks): ix = tuple([random.randrange(m) for m in W.shape]) shift = np.zeros(W.shape) shift[ix] = 1e-7 grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
def test_SVM_grad_vectorized_comparison_sparse(sample_train, check_count): Xtrain, ytrain = sample_train(count=500) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain -= mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 0.) f = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[0] g = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[1] #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h) #grad(loss) = grad vectorized num_checks = check_count grad_analytic = g(W) for i in range(num_checks): ix = tuple([random.randrange(m) for m in W.shape]) shift = np.zeros(W.shape) shift[ix] = 1e-7 grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) assert (abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
def test_SVM_loss_vectorized_comparison_mean(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain_mean_removed = Xtrain - mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) Xtrain_mean_removed = np.hstack([Xtrain_mean_removed, np.ones((Xtrain_mean_removed.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg) loss_mean_removed, grad_mean_removed = svm_loss_vectorized(W, Xtrain_mean_removed, ytrain, reg) #assert np.abs(loss - loss_mean_removed) > 0.01 assert np.linalg.norm(grad - grad_mean_removed) > 1.0
def svm_test_vectorized(x, y): # 随机生成一个很小的SVM权重矩阵,先标准正态分布,然后乘0.0001 W = np.random.randn(3073, 10) * 0.0001 # 计算SVM分类器的损失和权重的梯度(无正则项) loss, gradient = svm_loss_vectorized(W, x, y, 0.0) # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中 # 定义一个lambda表达式,计算损失值loss, f = lambda w: svm_loss_vectorized(w, x, y, 0.0)[0] grad_check_sparse(f, W, gradient) print('turn on regularization') # 计算SVM分类器的损失和权重的梯度(有正则项) loss, gradient = svm_loss_vectorized(W, x, y, 5e1) # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中 # 定义一个lambda表达式,计算损失值loss, f = lambda w: svm_loss_vectorized(w, x, y, 5e1)[0] grad_check_sparse(f, W, gradient)
def test_SVM_loss_vectorized_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001 with pytest.raises(ValueError): loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_SVM_loss_vectorized_comparison_mean(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) mean_image = np.mean(Xtrain, axis=0) Xtrain_mean_removed = Xtrain - mean_image Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) Xtrain_mean_removed = np.hstack( [Xtrain_mean_removed, np.ones((Xtrain_mean_removed.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg) loss_mean_removed, grad_mean_removed = svm_loss_vectorized( W, Xtrain_mean_removed, ytrain, reg) #assert np.abs(loss - loss_mean_removed) > 0.01 assert np.linalg.norm(grad - grad_mean_removed) > 1.0
def test_SVM_loss_vectorized_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001 with pytest.raises(ValueError): loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_SVM_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg) loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, reg) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_SVM_loss_naive_vectorized_comparison(sample_train, train_count): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2) loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, 1e2) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_SVM_loss_vectorized_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1],10) * 0.0001 #add the bias dimension (to X) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
def test_SVM_loss_vectorized_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 #add the bias dimension (to X) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
def main(): X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000) # generate a random SVM weight matrix of small numbers W = np.random.randn(10, 3073) * 0.01 start = time.clock() loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_naive: %f s" % (end - start) print 'loss: %f' % (loss, ) start = time.clock() loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_vectorized: %f s" % (end - start) print 'loss: %f' % (loss1, ) svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4, num_iters=100000,batch_size=128, verbose=True) acc_train = evaluation(svm, X_train, y_train) acc_val = evaluation(svm, X_val, y_val) acc_test = evaluation(svm, X_test, y_test) print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test) toc = time.time() print 'That took %fs' % (toc - tic)
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) ''' Evaluate naive implementation of loss ''' # Generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 # 3073x10 (10 classes) loss, grad = svm_loss_naive(W, X_dev, y_dev, 0) # # Numerically compute the gradient along several randomly chosen dimensions # # and compare with analytically computed gradient (grad) # f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] # Returns the loss # grad_numerical = grad_check_sparse(f, W, grad) # # Again with the regularization turned on # loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1) # f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0] # Returns the loss # grad_numerical = grad_check_sparse(f, W, grad) ''' Evaluate vectorized implementation of loss ''' loss_v, grad_v = svm_loss_vectorized(W, X_dev, y_dev, 0) print("Gradient difference", np.linalg.norm(grad - grad_v)) print("Loss difference", loss - loss_v) ''' Implement Stochastic Gradient Descent to minimize loss ''' svm = LinearSVM() tic = time.time() # Get list of loss history over training and visualize loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print("Time", (toc - tic)) plt.plot(loss_hist)
print correct[:,:2] diff -= correct # print diff dW = diff.dot(X.T)/float(num_train) # print dW loss1, grad = svm_loss_naive(W, X, y, 1) # print grad loss2, grad2 = svm_loss_vectorized(W, X, y, 1) # print grad2 print loss print loss1 print loss2 differ = np.linalg.norm(grad - grad2, ord='fro') print 'difference: %f' % differ differ1 = np.linalg.norm(dW - grad, ord='fro') print 'difference: %f' % differ1 differ2 = np.linalg.norm(dW - grad2, ord='fro') print 'difference: %f' % differ2
# generate a random SVM weight matrix of small numbers W = np.random.randn(196608, 5) * 0.0001 h5f = h5py.File('img_data.h5', 'r') X = h5f['dataset_1'][:] h5f.close() y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False) #X_train = np.zeros((27116,196608)) #y_train = np.zeros(27116) #X_val = np.zeros((5000,196608)) #y_val = np.zeros(5000) X_train = X[8000:35117, :] y_train = y[8000:35117] X_val = X[3000:8000, :] y_val = y[3000:8000] loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_train, y_train, 0.00001) svm = LinearSVM() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) y_train_pred = svm.predict(X_train) print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), ) y_val_pred = svm.predict(X_val) print 'validation accuracy: %f' % (np.mean(y_val == y_val_pred), )
def loss(self, X_batch, y_batch, reg): return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) mean_image = np.mean(X_train, axis=0) # second: subtract the mean image from train and test data X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # third: append the bias dimension of ones (i.e. bias trick) so that our SVM # only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) W = np.random.randn(3073, 10) * 0.0001 # svm_loss_naive svm_loss_vectorized loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) print(loss) loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 5e1) f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad)
def loss(self, X_batch, y_batch, reg): return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
# do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_naive(W, X_dev, y_dev, 1e2) f = lambda w: svm_loss_naive(w, X_dev, y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad) # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001) toc = time.time() print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic)) from cs231n.classifiers.linear_svm import svm_loss_vectorized tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.00001) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) # The losses should match but your vectorized implementation should be much faster. print('difference: %f' % (loss_naive - loss_vectorized)) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4,
print(mean_image[:10]) # print a few of the elements plt.figure(figsize=(4, 4)) plt.imshow(mean_image.reshape((32, 32, 3)).astype( 'uint8')) # visualize the mean image plt.show() # second: subtract the mean image from train and test data X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # third: append the bias dimension of ones (i.e. bias trick) so that our SVM # only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive, svm_loss_vectorized import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, ))
def old_vect_grad_desc(): ## Old version of gradient descent from cs231n.classifiers.linear_svm import svm_loss_naive from cs231n.classifiers.linear_svm import svm_loss_vectorized loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.5, dograd=False) loss2, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.5) print "loss: ", loss print "loss2: ", loss2 # data #reg = 10000 X = X_dev y = y_dev delta = 1 loss_sum = 0.0 loss = 0 reg = 0.5 step_size = 1e-5 dW = np.zeros(W.shape) # initialize the gradient as zero dW2 = np.zeros(W.shape) # initialize the gradient as zero scores = X.dot(W) # calculating 'correct_class_scores': unit_mat = np.ones([X.shape[0], W.shape[1]]) c = range(0, X.shape[0]) correct_class_score = np.identity(X.shape[0]) correct_class_score[c, c] = scores[c, y[c]] correct_class_score = np.dot(correct_class_score, unit_mat) correct_class_score[c, y[c]] = 0 loss = scores - correct_class_score + unit_mat * delta print loss.shape loss_uni = np.ones(loss.shape) loss_uni[loss < 0] = 0 print np.sum(loss_uni, axis=1) print loss_uni ## grad for y_i <> j dW2 = np.dot(np.transpose(X), loss_uni) print "dw2:", dW2.shape gr_minus = np.sum(loss > 0, axis=1) print "gr_minus", gr_minus zero_mat = np.zeros([X.shape[0], W.shape[1]]) zero_mat[c, y] = 1 gr_minus = np.dot(np.identity(gr_minus.shape[0]) * gr_minus, zero_mat) print gr_minus.shape print gr_minus[0:20, 0:20] print "x shape: ", X.shape dW2 -= np.dot(np.transpose(X), gr_minus) dW2 /= X.shape[0] dW2 += 0.5 * reg * 2 * W print "loss matr dimensions: ", loss.shape print X.shape k = 1 for k in xrange(X.shape[1]): for l in xrange(W.shape[1]): X_h = np.zeros([X.shape[0], W.shape[1]]) X_h[:, l] = X[:, k] * step_size c = range(0, X.shape[0]) unit_mat = np.ones([X.shape[0], W.shape[1]]) correct_class_score = np.identity(X.shape[0]) correct_class_score[c, c] = X_h[c, y[c]] correct_class_score = np.dot(correct_class_score, unit_mat) correct_class_score[c, y[c]] = 0 loss_h = loss + X_h - correct_class_score loss_h[loss_h < 0] = 0 loss_h[c, y[c]] = 0 loss_ph_sum = (np.sum(loss_h) / X.shape[0]) loss_h = loss - X_h + correct_class_score loss_h[loss_h < 0] = 0 loss_h[c, y[c]] = 0 loss_mh_sum = (np.sum(loss_h) / X.shape[0]) dW[k, l] = loss_ph_sum - loss_mh_sum dW = (dW + reg * 2 * W * step_size + step_size**2) / (2 * step_size) print dW.shape print dW[0, 0:10] print "dW2" print dW2.shape print dW2[0, 0:10] loss[loss < 0] = 0 loss[c, y[c]] = 0 difference = np.linalg.norm(dW - dW2, ord='fro') print("diff ", difference) loss_sum = (np.sum(loss) / X.shape[0]) + 0.5 * reg * np.sum(W * W) print loss_sum
# Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, Y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_naive(W, X_dev, Y_dev, 5e1) f = lambda w: svm_loss_naive(w, X_dev, Y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad) #check the gradient of vectorized svm f = lambda w: svm_loss_vectorized(w, X_dev, Y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_vectorized(W, X_dev, Y_dev, 5e1) f = lambda w: svm_loss_vectorized(w, X_dev, Y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad) # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, Y_dev, 0.000005) toc = time.time() print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))
# do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_naive_test(W, X_dev, y_dev, 1e2) f = lambda w: svm_loss_naive_test(w, X_dev, y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad) # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive_test(W, X_dev, y_dev, 0.00001) toc = time.time() print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic) tic = time.time() loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.00001) toc = time.time() print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) pdb.set_trace() # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM #svm = LinearSVM() #tic = time.time() #loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, # num_iters=1500, verbose=True) #toc = time.time() #print 'That took %fs' % (toc - tic) #pdb.set_trace()
# *Hint: the SVM loss function is not strictly speaking differentiable* # # **Your Answer:** *fill this in.* # In[ ]: # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_train, y_train, 0.00001) toc = time.time() print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic) from cs231n.classifiers.linear_svm import svm_loss_vectorized tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, X_train, y_train, 0.00001) toc = time.time() print 'Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic) # The losses should match but your vectorized implementation should be much faster. print 'difference: %f' % (loss_naive - loss_vectorized) # In[ ]: # Complete the implementation of svm_loss_vectorized, and compute the gradient # of the loss function in a vectorized way. # The naive implementation and the vectorized implementation should match, but # the vectorized version should still be much faster. tic = time.time()
def test2(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) num_training = 49000 num_validation = 1000 mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 1000 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # print 'Train data shape: ', X_train.shape # print 'Train labels shape: ', y_train.shape # print 'Validation data shape: ', X_val.shape # print 'Validation labels shape: ', y_val.shape # print 'Test data shape: ', X_test.shape # print 'Test labels shape: ', y_test.shape # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # As a sanity check, print out the shapes of the data # print 'Training data shape: ', X_train.shape # print 'Validation data shape: ', X_val.shape # print 'Test data shape: ', X_test.shape mean_image = np.mean(X_train, axis=0) #print mean_image[:10] #plt.figure(figsize=(4,4)) #plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) #plt.savefig('./figures/svm_mean.png') X_train -= mean_image X_val -= mean_image X_test -= mean_image X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T svm_sgd(X_train, y_train, X_val, y_val, X_test, y_test) #softmax_sgd(X_train,y_train, X_val, y_val,X_test,y_test) return W = np.random.randn(10, 3073) * 0.0001 # loss, grad = svm_loss_naive(W,X_train, y_train,0.00001) # print 'loss: %f' %(loss,) #loss, grad = svm_loss_naive(W,X_train,y_train,0.0) #loss, grad = svm_loss_vectorized(W,X_train,y_train,0.0) #f = lambda w: svm_loss_naive(w,X_train, y_train,0.0)[0] #grad_check_sparse(f, W, grad, 10) tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_train, y_train, 0.00001) toc = time.time() print 'Naive loss: %e ,computed in %fs' % (loss_naive, toc - tic) tic = time.time() loss_vectorized, grad_vector = svm_loss_vectorized(W, X_train, y_train, 0.00001) toc = time.time() print 'Vectorized loss: %e, computed in %fs' % (loss_vectorized, toc - tic) # The losses should match but your vectorized implementation should be much faster. print 'difference: %f' % (loss_naive - loss_vectorized) difference = np.linalg.norm(grad_naive - grad_vector, ord='fro') print 'difference of grad :%f' % difference
def loss(self, X_batch, y_batch, reg): return linear_svm.svm_loss_vectorized(self.W, X_batch, y_batch, reg)
# print(np.mean(xTrain,axis = 0).shape) # print(np.std(xTrain,axis = 0).shape) xTrain = (xTrain - np.mean(xTrain, axis=0)) / np.std(xTrain, axis=0) # print(np.mean(xTrain,axis = 0)) # print(np.std(xTrain,axis = 0)) W = 2 * np.random.random_sample((xTrain.shape[1], 10)) - 1 reg = 10 start = time.time() loss, dw = linear_svm.svm_loss_naive(W, xTrain, yTrain, 0) print("Time for Naive ", time.time() - start) start = time.time() lossVector, dwVector = linear_svm.svm_loss_vectorized(W, xTrain, yTrain, 0) print("Time for Vectorised approach ", time.time() - start) # def f(w): # return linear_svm.svm_loss_naive(w,xTrain,yTrain,0)[0] # gradient_check.grad_check_sparse(f, W, dw, num_checks=10, h=1e-5) cvFold = 5 learningRates = [1e-3] regStrengths = [0, 100, 200, 400, 500] xTrainCV = np.array(np.split(xTrain, cvFold)) yTrainCV = np.array(np.split(yTrain, cvFold)) accuracy = -1 alphaBest = -1
# It is possible that once in a while a dimension in the gradcheck will not match exactly. What could such a discrepancy be caused by? Is it a reason for concern? What is a simple example in one dimension where a gradient check could fail? How would change the margin affect of the frequency of this happening? *Hint: the SVM loss function is not strictly speaking differentiable* # # **Your Answer:** *fill this in.* # In[33]: # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005) toc = time.time() print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic)) from cs231n.classifiers.linear_svm import svm_loss_vectorized tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) # The losses should match but your vectorized implementation should be much faster. print('difference: %f' % (loss_naive - loss_vectorized)) # In[34]: # Complete the implementation of svm_loss_vectorized, and compute the gradient # of the loss function in a vectorized way. # The naive implementation and the vectorized implementation should match, but # the vectorized version should still be much faster. tic = time.time() _, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
# It is possible that once in a while a dimension in the gradcheck will not match exactly. What could such a discrepancy be caused by? Is it a reason for concern? What is a simple example in one dimension where a gradient check could fail? *Hint: the SVM loss function is not strictly speaking differentiable* # # **Your Answer:** *fill this in.* # In[ ]: # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001) toc = time.time() print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic) from cs231n.classifiers.linear_svm import svm_loss_vectorized tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.00001) toc = time.time() print 'Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic) # The losses should match but your vectorized implementation should be much faster. print 'difference: %f' % (loss_naive - loss_vectorized) # In[ ]: # Complete the implementation of svm_loss_vectorized, and compute the gradient # of the loss function in a vectorized way. # The naive implementation and the vectorized implementation should match, but # the vectorized version should still be much faster. tic = time.time()