def getQoS(self): X_train, y_train, X_test, y_test = load_CIFAR10(self.data_path) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_train = np.reshape(X_train, (X_train.shape[0], -1)) mean_image = np.mean(X_train, axis=0) X_test -= mean_image X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) svm = LinearSVM() try: svm.W = pickle.load(open(self.run_dir + "model_svm.p", "rb"), encoding='latin1') y_test_pred = svm.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred) except: test_accuracy = 0.0 return test_accuracy * 100.0
def compute_at(hyper_params): learning_rate, regularizer = hyper_params svm = LinearSVM() svm.train(X_train, y_train, learning_rate=learning_rate, reg=regularizer, num_iters=1000) y_train_prediction = svm.predict(X_train) train_accuracy = np.mean(y_train == y_train_prediction) y_val_prediction = svm.predict(X_val) val_accuracy = np.mean(y_val == y_val_prediction) final_accuracy = min(train_accuracy, val_accuracy) state.epoch += 1 improved = state.accuracy < final_accuracy if improved: state.accuracy = final_accuracy state.svm = svm state.hyper = hyper_params[:] print "Epoch %2d: (%.8f, %f) -> %f %s" % (state.epoch, learning_rate, regularizer, final_accuracy, "(!)" if improved else "") return improved, final_accuracy
def train(X_train, y_train, X_val, y_val): # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of about 0.4 on the validation set. learning_rates = [1e-7, 5e-6, 1e-6] regularization_strengths = [1e4, 5e4, 1e5] # results is dictionary mapping tuples of the form # (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} best_val = -1 # The highest validation accuracy that we have seen so far. best_svm = None # The LinearSVM object that achieved the highest validation rate. for lr in learning_rates: for reg in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=1000) y_train_pred = svm.predict(X_train) train_accuracy = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) val_accuracy = np.mean(y_val == y_val_pred) results[(lr, reg)] = (train_accuracy, val_accuracy) if best_val < val_accuracy: best_val = val_accuracy best_svm = svm # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'Learning-rate=%f regularizer=%f train-accuracy=%f validation-accuracy=%f' % (lr, reg, train_accuracy, val_accuracy) print 'Best validation accuracy achieved during cross-validation: %f' % best_val return best_svm
def main(): X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000) # generate a random SVM weight matrix of small numbers W = np.random.randn(10, 3073) * 0.01 start = time.clock() loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_naive: %f s" % (end - start) print 'loss: %f' % (loss, ) start = time.clock() loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001) end = time.clock() print "svm_loss_vectorized: %f s" % (end - start) print 'loss: %f' % (loss1, ) svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4, num_iters=100000,batch_size=128, verbose=True) acc_train = evaluation(svm, X_train, y_train) acc_val = evaluation(svm, X_val, y_val) acc_test = evaluation(svm, X_test, y_test) print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test) toc = time.time() print 'That took %fs' % (toc - tic)
# The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: %f' % difference) # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. # In[37]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # In[38]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist)
tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005) toc = time.time() print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic)) tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) # The losses should match but your vectorized implementation should be much faster. print('difference: %f' % (loss_naive - loss_vectorized)) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value')
# The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: %f' % difference) # %% [markdown] # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. Your code for this part will be written inside `cs231n/classifiers/linear_classifier.py`. # %% # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # %% # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number')
# we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. # In[6]: # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # In[7]: # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value')
tic = time.time() _, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.00001) toc = time.time() print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from cs231n.classifiers import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) toc = time.time() print 'That took %fs' % (toc - tic) # A useful debugging strategy is to plot the loss as a function of # iteration number: if (show_image != 0): plt.plot(loss_hist) plt.xlabel('Iteration number')
def SVM(train_data, train_label, validation_data, validation_label, test_data, test_label): W = np.random.randn(10, 3072) * 0.0001 loss, grad = svm_loss_naive(W, train_data, train_label, 0.000005) print 'loss: %f \n' % loss ''' f=lambda w: svm_loss_naive(w, train_data,train_label,0.0)[0] grad_numerical=grad_check_sparse(f,W,grad,10) loss, grad = svm_loss_naive(W,train_data,train_label,5e1) f=lambda w:svm_loss_naive(w,train_data,train_label,5e1)[0] grad_numerical=grad_check_sparse(f,W,grad,10) t1 = time.time() loss_naive, grad_naive = svm_loss_naive(W, train_data, train_label, 0.000005) t2 = time.time() print '\nNaive Loss: %e computed in %fs'%(loss_naive, t2-t1) t1 = time.time() loss_vectorized,grad_vectorized = svm_loss_vectorized(W, train_data, train_label, 0.000005) t2 = time.time() print 'Vectorised loss and gradient: %e computed in %fs\n'%(loss_vectorized, t2-t1) difference = np.linalg.norm(grad_naive-grad_vectorized, ord='fro') print 'difference: %f'%difference ''' from cs231n.classifiers import LinearSVM svm = LinearSVM() t1 = time.time() loss_hist = svm.train(train_data, train_label, learning_rate=1e-7, reg=5e4, num_iters=1000, verbose=True) t2 = time.time() print 'That took %fs' % (t2 - t1) plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() train_label_predict = svm.predict(train_data) print 'Training accuracy: %f' % np.mean(train_label == train_label_predict) validation_label_predict = svm.predict(validation_data) print 'Validation accuracy: %f' % np.mean(validation_label == validation_label_predict) learning_rates = [1e-7, 2e-7, 5e-7, 1e-6] regularization_strengths = [1e4, 2e4, 5e4, 1e5, 5e5, 1e6] results = {} best_val = -1 best_svm = None for learning in learning_rates: for regularization in regularization_strengths: svm = LinearSVM() svm.train(train_data, train_label, learning_rate=learning, reg=regularization, num_iters=2000) train_label_predict = svm.predict(train_data) train_accuracy = np.mean(train_label_predict == train_label) print 'Training accuracy: %f' % train_accuracy validation_label_predict = svm.predict(validation_data) val_accuracy = np.mean(validation_label_predict == validation_label) print 'Validation accuracy: %f' % val_accuracy if val_accuracy > best_val: best_val = val_accuracy best_svm = svm results[(learning, regularization)] = ( train_accuracy, val_accuracy) for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy %f' % (lr, reg, train_accuracy, val_accuracy) print 'Best validation accuracy achieved during cross validation: %f ' % best_val x_scatter = [math.log10(x[0]) for x in results] y_scatter = [math.log10(x[1]) for x in results] sz = [results[x][0] * 1500 for x in results] plt.subplot(1, 1, 1) plt.scatter(x_scatter, y_scatter, sz) plt.xlabel('log learning rate') plt.ylabel('log regularization strength') plt.title('Cifar-10 training accuracy') plt.show() sz = [results[x][1] * 1500 for x in results] plt.subplot(1, 1, 1) plt.scatter(x_scatter, y_scatter, sz) plt.xlabel('log learning rate') plt.ylabel('log regularization strength') plt.title('Cifar-10 validation accuracy') plt.show() y_test_pred = best_svm.predict(test_data) test_accuracy = np.mean(y_test_pred == test_label) print 'Linear SVM on raw pixels final test set accuracy: %f' % test_accuracy print best_svm.W.shape w = best_svm.W[:, :] print w.shape w = w.reshape(10, 32, 32, 3) w_min, w_max = np.min(w), np.max(w) classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] for i in xrange(10): plt.subplot(2, 5, i + 1) wimg = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i]) plt.show()