def compute_at(hyper_params): learning_rate, regularizer = hyper_params svm = LinearSVM() svm.train(X_train, y_train, learning_rate=learning_rate, reg=regularizer, num_iters=1000) y_train_prediction = svm.predict(X_train) train_accuracy = np.mean(y_train == y_train_prediction) y_val_prediction = svm.predict(X_val) val_accuracy = np.mean(y_val == y_val_prediction) final_accuracy = min(train_accuracy, val_accuracy) state.epoch += 1 improved = state.accuracy < final_accuracy if improved: state.accuracy = final_accuracy state.svm = svm state.hyper = hyper_params[:] print "Epoch %2d: (%.8f, %f) -> %f %s" % (state.epoch, learning_rate, regularizer, final_accuracy, "(!)" if improved else "") return improved, final_accuracy
def train(X_train, y_train, X_val, y_val): # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of about 0.4 on the validation set. learning_rates = [1e-7, 5e-6, 1e-6] regularization_strengths = [1e4, 5e4, 1e5] # results is dictionary mapping tuples of the form # (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} best_val = -1 # The highest validation accuracy that we have seen so far. best_svm = None # The LinearSVM object that achieved the highest validation rate. for lr in learning_rates: for reg in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=1000) y_train_pred = svm.predict(X_train) train_accuracy = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) val_accuracy = np.mean(y_val == y_val_pred) results[(lr, reg)] = (train_accuracy, val_accuracy) if best_val < val_accuracy: best_val = val_accuracy best_svm = svm # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'Learning-rate=%f regularizer=%f train-accuracy=%f validation-accuracy=%f' % (lr, reg, train_accuracy, val_accuracy) print 'Best validation accuracy achieved during cross-validation: %f' % best_val return best_svm
def main(): X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000) # generate a random SVM weight matrix of small numbers W = np.random.randn(10, 3073) * 0.01 start = time.clock() loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_naive: %f s" % (end - start) print 'loss: %f' % (loss, ) start = time.clock() loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_vectorized: %f s" % (end - start) print 'loss: %f' % (loss1, ) svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4, num_iters=100000,batch_size=128, verbose=True) acc_train = evaluation(svm, X_train, y_train) acc_val = evaluation(svm, X_val, y_val) acc_test = evaluation(svm, X_test, y_test) print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test) toc = time.time() print 'That took %fs' % (toc - tic)
print('Vectorized loss and gradient: computed in %fs' % (toc - tic)) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: %f' % difference) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') #plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train)
print('difference: %f' % difference) # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. # In[ ]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # In[ ]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # In[ ]:
# #loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.0) # #from cs231n.classifiers.linear_svm import svm_loss_vectorized # #loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 5e1) # #difference = np.linalg.norm(grad - grad_vectorized, ord='fro') #print('difference: %f' % difference) from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() y_train_pred = svm.predict(X_train) print('training accuracy: %f' % (np.mean(y_train == y_train_pred), )) y_val_pred = svm.predict(X_val) print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), ))
# The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: %f' % difference) # In[62]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # In[63]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # In[69]:
# we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical # gradient. We are therefore ready to do SGD to minimize the loss. # Now implement SGD in LinearSVM.train() function and run it with the code below from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-6, reg=1e5, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # In[ ]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # In[ ]:
# set. For each combination of hyperparameters, train a linear SVM on the # # training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ for learning_rate in learning_rates: for reg in regularization_strengths: print("LR",learning_rate,"reg",reg) svm = LinearSVM() loss_hist = svm.train(X_train, y_train, learning_rate=learning_rate[0], reg=reg, num_iters=learning_rate[1], verbose=True) y_train_pred = svm.predict(X_train) y_val_pred = svm.predict(X_val) results[(learning_rate[0],reg)] = (np.mean(y_train == y_train_pred),np.mean(y_val == y_val_pred)) if best_val < np.mean(y_val == y_val_pred): best_val = np.mean(y_val == y_val_pred) best_parameters = { 'LR':learning_rate[0], 'reg': reg} ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)]
print('Vectorized loss and gradient: computed in %fs' % (toc - tic)) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: %f' % difference) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train)
# training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ iters = 2000 #100 for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = svm.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_svm = svm # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)]
from cs231n.classifiers import LinearSVM # results is dictionary mapping tuples of the form # (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} best_val = -1 # The highest validation accuracy that we have seen so far. best_svm = None # The LinearSVM object that achieved the highest validation rate. for lr in learning_rates: for reg in regularization_strengths: svm = LinearSVM() loss_hist = svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=1500, verbose=False) y_train_pred = svm.predict(X_train) tr_acc = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) val_acc = np.mean(y_val == y_val_pred) print( 'lr: %f, reg: %f, training accuracy: %f, validation accuracy: %f' % ( lr, reg, tr_acc, val_acc, )) results[(lr, reg)] = (tr_acc, val_acc)
# # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ num_iter = 500 for i in learning_rates: for j in regularization_strengths: results[(i, j)]=0 for lr, reg in sorted(results): svm = LinearSVM() svm.train(X_train, y_train, lr, reg, num_iter, True) y_pred = svm.predict(X_test) val_accuracy = float(np.sum(y_pred == y_test))/y_test.shape[0] y_pred = svm.predict(X_train) train_accuracy = float(np.sum(y_pred == y_train))/y_train.shape[0] results[(lr, reg)] = [train_accuracy, val_accuracy] if(best_val<val_accuracy): best_val = val_accuracy best_svm = svm ################################################################################ # END OF YOUR CODE # ################################################################################
# Write code that chooses the best hyperparameters by tuning on the validation # # set. For each combination of hyperparameters, train a linear SVM on the # # training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ for lr in learning_rates: for reg in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, lr, reg, num_iters=10) y_train_pred = svm.predict(X_train) y_val_pred = svm.predict(X_val) tr_acc = np.mean(y_train_pred == y_train) val_acc = np.mean(y_val_pred == y_val) keys = (lr, reg) values = (tr_acc, val_acc) results[keys] = values if val_acc > best_val: best_val = val_acc best_svm = svm ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out results.
toc = time.time() print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() # tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=500, verbose=True) #should be 1500 iterations # toc = time.time() print 'That took %fs' % (toc - tic) # A useful debugging strategy is to plot the loss as a function of # iteration number: # plt.plot(loss_hist) # plt.xlabel('Iteration number') # plt.ylabel('Loss value') # plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train) print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), ) y_val_pred = svm.predict(X_val)
print('difference: %f' % difference) # %% [markdown] # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. Your code for this part will be written inside `cs231n/classifiers/linear_classifier.py`. # %% # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # %% # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # %% # Write the LinearSVM.predict function and evaluate the performance on both the
# training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ for lr_idx in range(0,len(learning_rates)): for reg_idx in range(0,len(regularization_strengths)): lr_use = learning_rates[lr_idx] reg_use = regularization_strengths[reg_idx] svm = LinearSVM() loss_hist = svm.train(X_dev, y_dev, learning_rate=lr_use, reg=reg_use, num_iters=1500, verbose=True) y_train_pred = svm.predict(X_dev) y_val_pred = svm.predict(X_val) acc_train = np.mean(y_train == y_train_pred) acc_val = np.mean(y_val == y_val_pred) if acc_val > best_val: best_lr = lr_use best_reg = reg_use best_val = acc_val best_svm = svm results_once = {(lr_use, reg_use): (acc_train, acc_val)} results.update(results_once) pdb.set_trace() ################################################################################ # END OF YOUR CODE #
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. # In[ ]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # In[ ]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # In[ ]:
# training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ iters = 2000 # 100 for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = svm.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_svm = svm # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)]
tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) # The losses should match but your vectorized implementation should be much faster. print('difference: %f' % (loss_naive - loss_vectorized)) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train)
toc = time.time() print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # 查看梯度下降法的效果 # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # 将损失曲线画出来 # A useful debugging strategy is to plot the loss as a function of # iteration number: # plt.plot(loss_hist) # plt.xlabel('Iteration number') # plt.ylabel('Loss value') # plt.show() # 在训练集和验证集上分别预测一下 y_train_pred = svm.predict(X_train) print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), ) y_val_pred = svm.predict(X_val)
# # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ tic = time.time() for ilr in np.arange(learning_rates[0], learning_rates[1], 0.05 * (learning_rates[1] - learning_rates[0])): for ireg in np.arange( regularization_strengths[0], regularization_strengths[1], 0.05 * (regularization_strengths[1] - regularization_strengths[0])): svm = LinearSVM() _ = svm.train(X_train, y_train, learning_rate=ilr, reg=ireg, num_iters=1500, verbose=True) y_train_pred = svm.predict(X_train) train_acc = np.mean(y_train == y_train_pred) print('training accuracy: %f' % (train_acc)) y_val_pred = svm.predict(X_val) val_acc = np.mean(y_val == y_val_pred) if val_acc > best_val: best_val = val_acc best_svm = svm print('validation accuracy: %f' % (val_acc)) results[(ilr, ireg)] = (train_acc, val_acc) ################################################################################ # END OF YOUR CODE #
# Write code that chooses the best hyperparameters by tuning on the validation # # set. For each combination of hyperparameters, train a linear SVM on the # # training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ for learning_rate in learning_rates: for regularization_strength in regularization_strengths: svm = LinearSVM() loss_hist = svm.train(X_train, y_train, learning_rate, regularization_strength, num_iters=1500, verbose=True) y_train_pred = svm.predict(X_train) y_val_pred = svm.predict(X_val) train_accuracy, val_accuracy = np.mean(y_train_pred==y_train),np.mean(y_val==y_val_pred) results[(learning_rate,regularization_strength)] = (train_accuracy,val_accuracy) if val_accuracy>best_val: best_val = val_accuracy best_svm = svm ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)]
print 'difference: %f' % difference # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. # In[ ]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # In[ ]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # In[ ]:
toc = time.time() print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference ''' Stochastic Gradient Descent ''' # Now implement SGD in LinearSVM.train() function and run it with the code below from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train) print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), ) y_val_pred = svm.predict(X_val)
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train)
# by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical # gradient. We are therefore ready to do SGD to minimize the loss. # Now implement SGD in LinearSVM.train() function and run it with the code below from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-6, reg=1e5, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # In[ ]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # In[ ]:
# training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ for learning_rate in learning_rates: for reg in regularization_strengths: svm = LinearSVM() loss_hist = svm.train(X_train, y_train, learning_rate=learning_rate, reg=reg, num_iters=2500, verbose=True) # training and validation set y_train_pred = svm.predict(X_train) training_accuracy = np.mean(y_train == y_train_pred) print('training accuracy: %f' % (training_accuracy, )) y_val_pred = svm.predict(X_val) validation_accuracy = np.mean(y_val == y_val_pred) print('validation accuracy: %f' % (validation_accuracy, )) results[(learning_rate, reg)] = (training_accuracy, validation_accuracy) if validation_accuracy > best_val: best_val = validation_accuracy best_svm = svm ################################################################################
print('difference: %f' % difference) # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. # In[37]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # In[38]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') #plt.show() # In[39]:
def SVM(train_data, train_label, validation_data, validation_label, test_data, test_label): W = np.random.randn(10, 3072) * 0.0001 loss, grad = svm_loss_naive(W, train_data, train_label, 0.000005) print 'loss: %f \n' % loss ''' f=lambda w: svm_loss_naive(w, train_data,train_label,0.0)[0] grad_numerical=grad_check_sparse(f,W,grad,10) loss, grad = svm_loss_naive(W,train_data,train_label,5e1) f=lambda w:svm_loss_naive(w,train_data,train_label,5e1)[0] grad_numerical=grad_check_sparse(f,W,grad,10) t1 = time.time() loss_naive, grad_naive = svm_loss_naive(W, train_data, train_label, 0.000005) t2 = time.time() print '\nNaive Loss: %e computed in %fs'%(loss_naive, t2-t1) t1 = time.time() loss_vectorized,grad_vectorized = svm_loss_vectorized(W, train_data, train_label, 0.000005) t2 = time.time() print 'Vectorised loss and gradient: %e computed in %fs\n'%(loss_vectorized, t2-t1) difference = np.linalg.norm(grad_naive-grad_vectorized, ord='fro') print 'difference: %f'%difference ''' from cs231n.classifiers import LinearSVM svm = LinearSVM() t1 = time.time() loss_hist = svm.train(train_data, train_label, learning_rate=1e-7, reg=5e4, num_iters=1000, verbose=True) t2 = time.time() print 'That took %fs' % (t2 - t1) plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() train_label_predict = svm.predict(train_data) print 'Training accuracy: %f' % np.mean(train_label == train_label_predict) validation_label_predict = svm.predict(validation_data) print 'Validation accuracy: %f' % np.mean(validation_label == validation_label_predict) learning_rates = [1e-7, 2e-7, 5e-7, 1e-6] regularization_strengths = [1e4, 2e4, 5e4, 1e5, 5e5, 1e6] results = {} best_val = -1 best_svm = None for learning in learning_rates: for regularization in regularization_strengths: svm = LinearSVM() svm.train(train_data, train_label, learning_rate=learning, reg=regularization, num_iters=2000) train_label_predict = svm.predict(train_data) train_accuracy = np.mean(train_label_predict == train_label) print 'Training accuracy: %f' % train_accuracy validation_label_predict = svm.predict(validation_data) val_accuracy = np.mean(validation_label_predict == validation_label) print 'Validation accuracy: %f' % val_accuracy if val_accuracy > best_val: best_val = val_accuracy best_svm = svm results[(learning, regularization)] = ( train_accuracy, val_accuracy) for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy %f' % (lr, reg, train_accuracy, val_accuracy) print 'Best validation accuracy achieved during cross validation: %f ' % best_val x_scatter = [math.log10(x[0]) for x in results] y_scatter = [math.log10(x[1]) for x in results] sz = [results[x][0] * 1500 for x in results] plt.subplot(1, 1, 1) plt.scatter(x_scatter, y_scatter, sz) plt.xlabel('log learning rate') plt.ylabel('log regularization strength') plt.title('Cifar-10 training accuracy') plt.show() sz = [results[x][1] * 1500 for x in results] plt.subplot(1, 1, 1) plt.scatter(x_scatter, y_scatter, sz) plt.xlabel('log learning rate') plt.ylabel('log regularization strength') plt.title('Cifar-10 validation accuracy') plt.show() y_test_pred = best_svm.predict(test_data) test_accuracy = np.mean(y_test_pred == test_label) print 'Linear SVM on raw pixels final test set accuracy: %f' % test_accuracy print best_svm.W.shape w = best_svm.W[:, :] print w.shape w = w.reshape(10, 32, 32, 3) w_min, w_max = np.min(w), np.max(w) classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] for i in xrange(10): plt.subplot(2, 5, i + 1) wimg = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i]) plt.show()