class DigitClassifier(): def __init__(self): self.svm = LinearSVM() self.fit() def fit(self): minst = tf.keras.datasets.mnist (X_train, y_train), (X_test, y_test) = minst.load_data() num_train = 15000 num_val = 1000 num_dev = 500 num_test = 10000 # Validation set mask = range(num_train, num_train + num_val) X_val = X_train[mask] y_val = y_train[mask] # Train set mask = range(num_train) X_train = X_train[mask] y_train = y_train[mask] # Small training set (development set) mask = np.random.choice(num_train, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the images data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) tic = time.time() loss_hist = self.svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # plt.plot(loss_hist) # plt.xlabel('Iteration number') # plt.ylabel('Loss value') # plt.show() def predict(self, x): return self.svm.predict(x)
# The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm(对应元素的平方和再开方) to compare them. #difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') #print('difference: %f' % difference) ''' # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from linear_classifier import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) ''''' # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() ''' # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set
# almost exactly along all dimensions. f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 5e1) f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. svm=LinearSVM() tic=time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc=time.time() print('That tooks %fs'%(toc-tic)) """ # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.savefig('/home/hongyin/file/cs231n-assignment1/picFaster.jpg') """ # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = svm.predict(X_train)
def cross_validation(X_train, y_train, X_val, y_val): ############################################################################################# # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of about 0.4 on the validation set. ############################################################################################# learning_rates = [1e-7, 5e-5] regularization_strengths = [5e4, 1e5] # results is dictionary mapping tuples of the form # (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} best_val = -1 # The highest validation accuracy that we have seen so far. best_svm = None # The LinearSVM object that achieved the highest validation rate. ################################################################################ # TODO: # # Write code that chooses the best hyperparameters by tuning on the validation # # set. For each combination of hyperparameters, train a linear SVM on the # # training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ iters = 2000 #100 for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = svm.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_svm = svm ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print('lr %e reg %e train accuracy: %f val accuracy: %f' % (lr, reg, train_accuracy, val_accuracy)) print('best validation accuracy achieved during cross-validation: %f' % best_val) return results, best_svm
print('grad_numerical: ', grad_numerical) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? svmDevObj2 = SVM(W, X_dev, y_dev, 5e1) loss, grad = svmDevObj2.svm_loss_naive() f = lambda w: svmDevObj2.svm_loss_naive()[0] grad_numerical = grad_check_sparse(f, W, grad) print('grad_numerical: ', grad_numerical) ##################### linearSVM = LinearSVM() tic = time.time() loss_hist = linearSVM.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set y_train_pred = linearSVM.predict(X_train)
b1[i] = a b2 = np.zeros((test_num, 256)) for i in range(test_num): a = np.bincount(test_images[i].astype(int), minlength=256).reshape(1, -1) b2[i] = a b3 = np.zeros((val_num, 256)) for i in range(val_num): a = np.bincount(test_images[i].astype(int), minlength=256).reshape(1, -1) b3[i] = a #================================================================================= # loss,grad=svm.svm_loss_naive(w,train_images,train_labels,reg) # print(loss, grad) svm = LinearSVM() #创建分类器对象,此时W为空 loss_hist = svm.train(train_images, train_labels, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) #此时svm对象中有W y_train_pred = svm.predict(train_images) print('training accuracy: %f' % (np.mean(train_labels == y_train_pred))) # y_val_pred = svm.predict(val_images) # print('validation accuracy: %f'%(np.mean(val_labels==y_val_pred))) # 超参数调优(交叉验证) learning_rates = [1.4e-7, 1.5e-7, 1.6e-7] # for循环的简化写法12个 regularization_strengths = [(1 + i * 0.1) * 1e4 for i in range(-3, 3)] + [(2 + i * 0.1) * 1e4 for i in range(-3, 3)] results = {} # 字典
# tune hyperparameters learningRates = [1e-7] regularization = [5e3] # iteration = [3000, 4000, 5000, 6000, 7000, 8000] iteration = [6000] bestParams = [] bestValAcc = 0 bestSvm = None for eta in learningRates: for r in regularization: for t in iteration: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=eta, reg=r, num_iters=t, verbose=True) y_train_pred = svm.predict(X_train) y_val_pred = svm.predict(X_val) trainAcc = np.mean(y_train == y_train_pred) valAcc = np.mean(y_val == y_val_pred) print 'iteration: %d train accuracy: %.4f val accuracy: %.4f' % ( t, trainAcc, valAcc) if valAcc > bestValAcc: bestParams = [eta, r, t] bestValAcc = valAcc bestSvm = svm print 'best validation accuracy achieved: %.4f' % bestValAcc print bestParams
# (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} # The highest validation accuracy that we have seen so far. best_val = -1 # The LinearSVM object that achieved the highest validation rate. best_svm = None # lr = learning rate , reg = regularization_strength for lr in learning_rates: for reg in regularization_strengths: # new a svm svm = LinearSVM() # train with training set svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=500) # get training set accuracy y_train_pred = svm.predict(X_train) training_accuracy = np.mean(y_train_pred == y_train) #print('Training set accuracy is %f' % (training_accuracy,)) # get validation set accuracy y_val_pred = svm.predict(X_val) validation_accuracy = np.mean(y_val_pred == y_val) #print('Validation set accuracy is %f' % (validation_accuracy,)) # store the results results[(lr, reg)] = (training_accuracy, validation_accuracy) if validation_accuracy > best_val: best_val = validation_accuracy best_svm = svm """ for lr, reg in sorted(results):
# grad_check_sparse(f, W, grad) # time_start = time.time() # loss_naive, gradient_navie = svm_loss_naive(W, X_dev, y_dev, 5e-6) # time_end = time.time() # print ('Naive loss: ', loss_naive, ' use time: ', time_end - time_start) # # time_start = time.time() # loss_vector, gradient_vector = svm_loss_vectorized(W, X_dev, y_dev, 5e-6) # time_end = time.time() # print ('Vector loss: ', loss_vector, ' use time: ', time_end - time_start) # print ('different loss: ', loss_vector - loss_naive) from linear_classifier import LinearSVM svm = LinearSVM() time_start = time.time() loss_histroy = svm.train(X_train, y_train, learning_rate=1.5e-7, reg=3.25e4, num_iters=1500, batch_size=5000, verbose=True) time_end = time.time() print ('train take time: ', time_end - time_start) # 将损失和循环次数画出来,有利于debug plt.plot(loss_histroy) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() y_val_pred = svm.predict(X_val) print ('accuracy: %f' % (np.mean(y_val_pred == y_val)))
print('Vectorized loss and gradient: computed in %fs' % (toc - tic)) # The loss is a single number, so it is easy to compare the values computed # by the two implemetations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. diffrence = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('difference: %f' % diffrence) # ### Stochastic Gradient Descent # # We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs ' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Write the LinearSVM.predict function and evaluate the performance on both the # training and validation set
learning_rates = [5e-8, 1e-7, 5e-7, 1e-6] regularization_strengths = [1e3, 5e3, 1e4, 5e4, 1e5, 5e5] results = {} # The highest validation accuracy that we have seen so far best_val = -1 # The LinearSVM object that achieved the highest validation rate best_svm = None for strength in regularization_strengths: for rate in learning_rates: svm = LinearSVM() svm.train(x_train, y_train, learning_rate=rate, reg=strength, num_iters=1500, verbose=True) y_train_pred = svm.predict(x_train) train_accuracy = np.mean(y_train == y_train_pred) y_valid_pred = svm.predict(x_val) val_accuracy = np.mean(y_val == y_valid_pred) results[(rate, strength)] = (train_accuracy, val_accuracy) if val_accuracy > best_val: best_val = val_accuracy best_svm = svm for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
learning_rates = [1.4e-7, 1.5e-7, 1.6e-7] range = range(-3, 3) regularization_strengths = [(1 + i * 0.1) * 1e4 for i in range] + [(2 + 0.1 * i) * 1e4 for i in range] results = {} best_val = -1 best_svm = None for rs in regularization_strengths: for lr in learning_rates: svm = LinearSVM() loss_hist = svm.train(train_data, train_labels, lr, rs, num_iters=3000) train_labels_pred = svm.predict(train_data) train_accuracy = np.mean(train_labels == train_labels_pred) val_labels_pred = svm.predict(val_data) val_accuracy = np.mean(val_labels == val_labels_pred) if val_accuracy > best_val: best_val = val_accuracy best_svm = svm results[(lr, rs)] = train_accuracy, val_accuracy print 'lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, rs, train_accuracy, val_accuracy) for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)]