def test_SVM_random_weights(sample_train, weight_size=0.0001, regularization=1.0): Xtrain, ytrain = sample_train() Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(3073,10) * weight_size loss, grad = svm_loss_naive(W, Xtrain, ytrain, regularization) assert loss > 8.5
def test_SVM_loss_naive_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #i.e. using the correct W size W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001 with pytest.raises(ValueError): loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
def test_SVM_loss_naive_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #i.e. using the correct W size W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001 with pytest.raises(ValueError): loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
def test_SVM_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg) loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, reg) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_SVM_loss_naive_vectorized_comparison(sample_train, train_count): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2) loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, 1e2) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_SVM_random_weights(sample_train, weight_size=0.0001, regularization=1.0): Xtrain, ytrain = sample_train() Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(3073, 10) * weight_size loss, grad = svm_loss_naive(W, Xtrain, ytrain, regularization) assert loss > 8.5
def test_SVM_loss_naive_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1],10) * 0.0001 #add the bias dimension Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
def test_SVM_loss_naive_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 #add the bias dimension Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
def main(): X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000) # generate a random SVM weight matrix of small numbers W = np.random.randn(10, 3073) * 0.01 start = time.clock() loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_naive: %f s" % (end - start) print 'loss: %f' % (loss, ) start = time.clock() loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_vectorized: %f s" % (end - start) print 'loss: %f' % (loss1, ) svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4, num_iters=100000,batch_size=128, verbose=True) acc_train = evaluation(svm, X_train, y_train) acc_val = evaluation(svm, X_val, y_val) acc_test = evaluation(svm, X_test, y_test) print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test) toc = time.time() print 'That took %fs' % (toc - tic)
print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive from cs231n.classifiers.linear_svm import svm_loss_vectorized import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, )) # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1) f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad)
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) # ## SVM Classifier # # Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. # # As you can see, we have prefilled the function `compute_loss_naive` # which uses for loops to evaluate the multiclass SVM loss function. # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, )) # The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function. # # To check that you have correctly implemented the gradient correctly, you # can numerically estimate the gradient of the loss function and compare # the numeric estimate to the gradient that you computed. We have provided # code that does this for you: # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)
def f(w): return svm_loss_naive(w, X_dev, y_dev, 5e1)[0]
xTest = xTest[:lengthTest] yTest = yTest[:lengthTest] # print(xTrain.shape) # print(np.mean(xTrain,axis = 0).shape) # print(np.std(xTrain,axis = 0).shape) xTrain = (xTrain - np.mean(xTrain, axis=0)) / np.std(xTrain, axis=0) # print(np.mean(xTrain,axis = 0)) # print(np.std(xTrain,axis = 0)) W = 2 * np.random.random_sample((xTrain.shape[1], 10)) - 1 reg = 10 start = time.time() loss, dw = linear_svm.svm_loss_naive(W, xTrain, yTrain, 0) print("Time for Naive ", time.time() - start) start = time.time() lossVector, dwVector = linear_svm.svm_loss_vectorized(W, xTrain, yTrain, 0) print("Time for Vectorised approach ", time.time() - start) # def f(w): # return linear_svm.svm_loss_naive(w,xTrain,yTrain,0)[0] # gradient_check.grad_check_sparse(f, W, dw, num_checks=10, h=1e-5) cvFold = 5 learningRates = [1e-3] regStrengths = [0, 100, 200, 400, 500] xTrainCV = np.array(np.split(xTrain, cvFold))
# only has to worry about optimizing a single weight matrix W.在每一个矩阵后面添加一列偏置维度,全是1,可以看到变成了3073 X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001) print('loss: %f' % (loss, )) # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad)
# dW = diff.dot(X.T) correct = num_sum * y_index print correct[:,:2] diff -= correct # print diff dW = diff.dot(X.T)/float(num_train) # print dW loss1, grad = svm_loss_naive(W, X, y, 1) # print grad loss2, grad2 = svm_loss_vectorized(W, X, y, 1) # print grad2 print loss print loss1 print loss2 differ = np.linalg.norm(grad - grad2, ord='fro') print 'difference: %f' % differ differ1 = np.linalg.norm(dW - grad, ord='fro')
# ## SVM Classifier # # Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. # # As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function. # In[9]: # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(10, 3073) * 0.0001 loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001) print 'loss: %f' % (loss, ) # The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function. # # To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you: # In[12]: # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. #loss, grad = svm_loss_naive(W, X_train, y_train, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and
(X_train.shape[0], 1))]) #hstack((a,b))将b附在a后 X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) # print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn( 3073, 10) * 0.0001 #np.random.randn(3073, 10)产生(3073, 10)大小数组,元素服从N(0,1) loss, grad = svm_loss_naive( W, X_dev, y_dev, 0.000005) # 0.000005 ??? print('loss: %f' % (loss, )) # 输出:loss: 9.548658 # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) ''''' # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on
def old_vect_grad_desc(): ## Old version of gradient descent from cs231n.classifiers.linear_svm import svm_loss_naive from cs231n.classifiers.linear_svm import svm_loss_vectorized loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.5, dograd=False) loss2, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.5) print "loss: ", loss print "loss2: ", loss2 # data #reg = 10000 X = X_dev y = y_dev delta = 1 loss_sum = 0.0 loss = 0 reg = 0.5 step_size = 1e-5 dW = np.zeros(W.shape) # initialize the gradient as zero dW2 = np.zeros(W.shape) # initialize the gradient as zero scores = X.dot(W) # calculating 'correct_class_scores': unit_mat = np.ones([X.shape[0], W.shape[1]]) c = range(0, X.shape[0]) correct_class_score = np.identity(X.shape[0]) correct_class_score[c, c] = scores[c, y[c]] correct_class_score = np.dot(correct_class_score, unit_mat) correct_class_score[c, y[c]] = 0 loss = scores - correct_class_score + unit_mat * delta print loss.shape loss_uni = np.ones(loss.shape) loss_uni[loss < 0] = 0 print np.sum(loss_uni, axis=1) print loss_uni ## grad for y_i <> j dW2 = np.dot(np.transpose(X), loss_uni) print "dw2:", dW2.shape gr_minus = np.sum(loss > 0, axis=1) print "gr_minus", gr_minus zero_mat = np.zeros([X.shape[0], W.shape[1]]) zero_mat[c, y] = 1 gr_minus = np.dot(np.identity(gr_minus.shape[0]) * gr_minus, zero_mat) print gr_minus.shape print gr_minus[0:20, 0:20] print "x shape: ", X.shape dW2 -= np.dot(np.transpose(X), gr_minus) dW2 /= X.shape[0] dW2 += 0.5 * reg * 2 * W print "loss matr dimensions: ", loss.shape print X.shape k = 1 for k in xrange(X.shape[1]): for l in xrange(W.shape[1]): X_h = np.zeros([X.shape[0], W.shape[1]]) X_h[:, l] = X[:, k] * step_size c = range(0, X.shape[0]) unit_mat = np.ones([X.shape[0], W.shape[1]]) correct_class_score = np.identity(X.shape[0]) correct_class_score[c, c] = X_h[c, y[c]] correct_class_score = np.dot(correct_class_score, unit_mat) correct_class_score[c, y[c]] = 0 loss_h = loss + X_h - correct_class_score loss_h[loss_h < 0] = 0 loss_h[c, y[c]] = 0 loss_ph_sum = (np.sum(loss_h) / X.shape[0]) loss_h = loss - X_h + correct_class_score loss_h[loss_h < 0] = 0 loss_h[c, y[c]] = 0 loss_mh_sum = (np.sum(loss_h) / X.shape[0]) dW[k, l] = loss_ph_sum - loss_mh_sum dW = (dW + reg * 2 * W * step_size + step_size**2) / (2 * step_size) print dW.shape print dW[0, 0:10] print "dW2" print dW2.shape print dW2[0, 0:10] loss[loss < 0] = 0 loss[c, y[c]] = 0 difference = np.linalg.norm(dW - dW2, ord='fro') print("diff ", difference) loss_sum = (np.sum(loss) / X.shape[0]) + 0.5 * reg * np.sum(W * W) print loss_sum
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=False): """ Train this linear classifier using stochastic gradient descent. Inputs: - X: A numpy array of shape (N, D) containing training data; there are N training samples each of dimension D. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label 0 <= c < C for C classes. - learning_rate: (float) learning rate for optimization. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - batch_size: (integer) number of training examples to use at each step. - verbose: (boolean) If true, print progress during optimization. Outputs: A list containing the value of the loss function at each training iteration. """ num_train, dim = X.shape num_classes = np.max( y ) + 1 # assume y takes values 0...K-1 where K is number of classes if self.W is None: # lazily initialize W self.W = 0.001 * np.random.randn(dim, num_classes) # Run stochastic gradient descent to optimize W loss_history = [] for it in range(num_iters): X_batch = None y_batch = None ######################################################################### # TODO: # # Sample batch_size elements from the training data and their # # corresponding labels to use in this round of gradient descent. # # Store the data in X_batch and their corresponding labels in # # y_batch; after sampling X_batch should have shape (dim, batch_size) # # and y_batch should have shape (batch_size,) # # # # Hint: Use np.random.choice to generate indices. Sampling with # # replacement is faster than sampling without replacement. # ######################################################################### indexes = np.arange(num_train) np.random.shuffle(indexes) indexes = indexes[:batch_size] X_batch = X[indexes] # sample 256 examples y_batch = y[indexes] l, dW = svm_loss_naive(self.W, X_batch, y_batch, reg) #from cs231n.gradient_check import grad_check_sparse #loss1, grad1 = svm_loss_naive(self.W, X_batch, y_batch, 5e1) #f = lambda w: svm_loss_naive(w, X_batch, y_batch, 5e1)[0] #grad_numerical = grad_check_sparse(f, self.W, grad1) if it % 100 == 0 and verbose: print("%d : %f" % (it, l)) loss_history.append(l) self.W += -learning_rate * dW # perform parameter update #l, dW = svm_loss_vectorized(self.W, X_batch, y_batch, reg) # perform parameter update ######################################################################### # TODO: # # Update the weights using the gradient and the learning rate. # ######################################################################### ######################################################################### # END OF YOUR CODE # ######################################################################### return loss_history
# ## SVM Classifier # # Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. # # As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function. # In[ ]: # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001) print 'loss: %f' % (loss, ) # The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function. # # To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you: # In[ ]: # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. # loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and
mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # Append bias dimension of ones (bias trick) so SVM only needs to optimize # a single weight matrix W X_train = np.hstack([X_train, np.ones( (X_train.shape[0], 1))]) # second dim becomes 3072+1=3073 X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) ''' Evaluate naive implementation of loss ''' # Generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 # 3073x10 (10 classes) loss, grad = svm_loss_naive(W, X_dev, y_dev, 0) # # Numerically compute the gradient along several randomly chosen dimensions # # and compare with analytically computed gradient (grad) # f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] # Returns the loss # grad_numerical = grad_check_sparse(f, W, grad) # # Again with the regularization turned on # loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1) # f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0] # Returns the loss # grad_numerical = grad_check_sparse(f, W, grad) ''' Evaluate vectorized implementation of loss ''' loss_v, grad_v = svm_loss_vectorized(W, X_dev, y_dev, 0) print("Gradient difference", np.linalg.norm(grad - grad_v)) print("Loss difference", loss - loss_v) ''' Implement Stochastic Gradient Descent to minimize loss ''' svm = LinearSVM()
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) #print X_train[0] print X_train.shape, X_val.shape, X_test.shape, X_dev.shape # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001) print 'loss: %f' % (loss, ) # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. #loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse #f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] #grad_numerical = grad_check_sparse(f, W, grad)
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) #%% # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, )) #%% # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[ 0] # lambda operator, create a throw-away function
def test2(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) num_training = 49000 num_validation = 1000 mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 1000 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # print 'Train data shape: ', X_train.shape # print 'Train labels shape: ', y_train.shape # print 'Validation data shape: ', X_val.shape # print 'Validation labels shape: ', y_val.shape # print 'Test data shape: ', X_test.shape # print 'Test labels shape: ', y_test.shape # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # As a sanity check, print out the shapes of the data # print 'Training data shape: ', X_train.shape # print 'Validation data shape: ', X_val.shape # print 'Test data shape: ', X_test.shape mean_image = np.mean(X_train, axis=0) #print mean_image[:10] #plt.figure(figsize=(4,4)) #plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) #plt.savefig('./figures/svm_mean.png') X_train -= mean_image X_val -= mean_image X_test -= mean_image X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T svm_sgd(X_train, y_train, X_val, y_val, X_test, y_test) #softmax_sgd(X_train,y_train, X_val, y_val,X_test,y_test) return W = np.random.randn(10, 3073) * 0.0001 # loss, grad = svm_loss_naive(W,X_train, y_train,0.00001) # print 'loss: %f' %(loss,) #loss, grad = svm_loss_naive(W,X_train,y_train,0.0) #loss, grad = svm_loss_vectorized(W,X_train,y_train,0.0) #f = lambda w: svm_loss_naive(w,X_train, y_train,0.0)[0] #grad_check_sparse(f, W, grad, 10) tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_train, y_train, 0.00001) toc = time.time() print 'Naive loss: %e ,computed in %fs' % (loss_naive, toc - tic) tic = time.time() loss_vectorized, grad_vector = svm_loss_vectorized(W, X_train, y_train, 0.00001) toc = time.time() print 'Vectorized loss: %e, computed in %fs' % (loss_vectorized, toc - tic) # The losses should match but your vectorized implementation should be much faster. print 'difference: %f' % (loss_naive - loss_vectorized) difference = np.linalg.norm(grad_naive - grad_vector, ord='fro') print 'difference of grad :%f' % difference
# ## SVM Classifier # # Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. # # As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function. # In[ ]: # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001) print 'loss: %f' % (loss, ) # The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function. # # To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you: # In[ ]: # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)
# only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print X_train.shape, X_val.shape, X_test.shape, X_dev.shape #%% # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001) print 'loss: %f' % (loss, )
# ## SVM Classifier # # Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. # # As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function. # In[31]: # Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, )) # The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function. # # To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you: # In[32]: # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and
def SVM(train_data, train_label, validation_data, validation_label, test_data, test_label): W = np.random.randn(10, 3072) * 0.0001 loss, grad = svm_loss_naive(W, train_data, train_label, 0.000005) print 'loss: %f \n' % loss ''' f=lambda w: svm_loss_naive(w, train_data,train_label,0.0)[0] grad_numerical=grad_check_sparse(f,W,grad,10) loss, grad = svm_loss_naive(W,train_data,train_label,5e1) f=lambda w:svm_loss_naive(w,train_data,train_label,5e1)[0] grad_numerical=grad_check_sparse(f,W,grad,10) t1 = time.time() loss_naive, grad_naive = svm_loss_naive(W, train_data, train_label, 0.000005) t2 = time.time() print '\nNaive Loss: %e computed in %fs'%(loss_naive, t2-t1) t1 = time.time() loss_vectorized,grad_vectorized = svm_loss_vectorized(W, train_data, train_label, 0.000005) t2 = time.time() print 'Vectorised loss and gradient: %e computed in %fs\n'%(loss_vectorized, t2-t1) difference = np.linalg.norm(grad_naive-grad_vectorized, ord='fro') print 'difference: %f'%difference ''' from cs231n.classifiers import LinearSVM svm = LinearSVM() t1 = time.time() loss_hist = svm.train(train_data, train_label, learning_rate=1e-7, reg=5e4, num_iters=1000, verbose=True) t2 = time.time() print 'That took %fs' % (t2 - t1) plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() train_label_predict = svm.predict(train_data) print 'Training accuracy: %f' % np.mean(train_label == train_label_predict) validation_label_predict = svm.predict(validation_data) print 'Validation accuracy: %f' % np.mean(validation_label == validation_label_predict) learning_rates = [1e-7, 2e-7, 5e-7, 1e-6] regularization_strengths = [1e4, 2e4, 5e4, 1e5, 5e5, 1e6] results = {} best_val = -1 best_svm = None for learning in learning_rates: for regularization in regularization_strengths: svm = LinearSVM() svm.train(train_data, train_label, learning_rate=learning, reg=regularization, num_iters=2000) train_label_predict = svm.predict(train_data) train_accuracy = np.mean(train_label_predict == train_label) print 'Training accuracy: %f' % train_accuracy validation_label_predict = svm.predict(validation_data) val_accuracy = np.mean(validation_label_predict == validation_label) print 'Validation accuracy: %f' % val_accuracy if val_accuracy > best_val: best_val = val_accuracy best_svm = svm results[(learning, regularization)] = ( train_accuracy, val_accuracy) for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy %f' % (lr, reg, train_accuracy, val_accuracy) print 'Best validation accuracy achieved during cross validation: %f ' % best_val x_scatter = [math.log10(x[0]) for x in results] y_scatter = [math.log10(x[1]) for x in results] sz = [results[x][0] * 1500 for x in results] plt.subplot(1, 1, 1) plt.scatter(x_scatter, y_scatter, sz) plt.xlabel('log learning rate') plt.ylabel('log regularization strength') plt.title('Cifar-10 training accuracy') plt.show() sz = [results[x][1] * 1500 for x in results] plt.subplot(1, 1, 1) plt.scatter(x_scatter, y_scatter, sz) plt.xlabel('log learning rate') plt.ylabel('log regularization strength') plt.title('Cifar-10 validation accuracy') plt.show() y_test_pred = best_svm.predict(test_data) test_accuracy = np.mean(y_test_pred == test_label) print 'Linear SVM on raw pixels final test set accuracy: %f' % test_accuracy print best_svm.W.shape w = best_svm.W[:, :] print w.shape w = w.reshape(10, 32, 32, 3) w_min, w_max = np.min(w), np.max(w) classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] for i in xrange(10): plt.subplot(2, 5, i + 1) wimg = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i]) plt.show()