def cross_validataion(X_train, y_train, X_val, y_val): """ Use the validation set to tune hyper-parameter (regularization strength and learning rate) You should experiment with different ranges for the learning rates and regularization strengths If you are careful, you should be able to get a classification accuracy of over 0.35 on the validation set """ results = {} best_val = -1 best_softmax = None learning_rate = [1e-7, 5e-7] regularization_strengths = [2.5e4, 5e4] iters = 1500 for lr in learning_rate: for rs in regularization_strengths: softmax = Softmax() softmax.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) Tr_pred = softmax.predict(X_train) acc_train = np.mean(y_train == Tr_pred) Val_pred = softmax.predict(X_val) acc_val = np.mean(y_val == Val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_softmax = softmax # Print out result: for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, rs)] print('lr %e reg %e train accuracy: %f val_accuracy: %f' % (lr, rs, train_accuracy, val_accuracy)) print('best validation accuracy achieved during cross-validation: %f' % (best_val)) return best_softmax
val_data = np.hstack([val_data, np.ones((val_data.shape[0], 1))]) dev_data = np.hstack([dev_data, np.ones((dev_data.shape[0], 1))]) learning_rates = [1.4e-7, 1.5e-7, 1.6e-7] range = range(-3, 3) regularization_strengths = [(1 + i * 0.1) * 1e4 for i in range] + [(2 + 0.1 * i) * 1e4 for i in range] results = {} best_val = -1 best_svm = None for rs in regularization_strengths: for lr in learning_rates: sofmax = Softmax() loss_hist = sofmax.train(train_data, train_labels, lr, rs, num_iters=3000) train_labels_pred = sofmax.predict(train_data) train_accuracy = np.mean(train_labels == train_labels_pred) val_labels_pred = sofmax.predict(val_data) val_accuracy = np.mean(val_labels == val_labels_pred) if val_accuracy > best_val: best_val = val_accuracy best_svm = sofmax results[(lr, rs)] = train_accuracy, val_accuracy print 'lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, rs, train_accuracy, val_accuracy)
loss_vec, grad_vec = softmax_loss_vec(W, X_dev, Y_dev, 1e2) f = lambda w: softmax_loss_vec(w, X_dev, Y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad_vec) # implement SGD and find best parameters from linear_classifier import Softmax results = {} best_val = -1 beat_softmax = None learning_rates = [10**(x * 2) for x in range(-3, 3)] reg_strength = [10**(x * 2) for x in range(-3, 3)] for lr in learning_rates: for reg in reg_strength: sm = Softmax() sm.train(X_train, Y_train, learning_rate=lr, reg=reg, num_iter=1500) y_train_pred = sm.predict(X_train) y_val_pred = sm.predict(X_val) train_acc = np.mean(y_train_pred == Y_train) val_acc = np.mean(y_val_pred == Y_val) results[(lr, reg)] = (train_acc, val_acc) if val_acc > best_val: best_val = val_acc best_softmax = sm for lr, reg in sorted(results): train_acc, val_acc = results[(lr, reg)]
W = np.random.randn(3073, 10) * 0.0001 loss, gradient = softmax_loss_naive(W, X_dev, y_dev, 0.0) print ('loss: %f' % loss) # 之所以觉得这个地方获取的loss值会接近-np.log(0.1)是因为权值*0.0001后接近0,然后以此为指数得到的值接近1,共有十个类别大家都等概率,所以p接近0.1,故预测loss为-np.log(0.1) print ('sanity check: %f' % (-np.log(0.1))) import time time_start = time.time() loss_naive, gradient_naive = softmax_loss_naive(W, X_train, y_train, 5e-6) time_end = time.time() print ('loss naive take %f seconds - and loss is: %f' % (time_end - time_start, loss_naive)) time_start = time.time() loss_vector, gradient_vector = softmax_loss_vectorized(W, X_train, y_train, 5e-6) time_end = time.time() print ('loss vector take %f seconds - and loss is: %f' % (time_end - time_start, loss_vector)) gradient_diff = np.linalg.norm(gradient_naive - gradient_vector, ord='fro') print ('lost different: ', np.abs(loss_vector - loss_naive)) print ('gradient different: ', gradient_diff) from linear_classifier import Softmax softmax = Softmax() softmax.train(X_train, y_train, learning_rate=1.67e-8, reg=1e-2, num_iters=1500, verbose=True, method=0) y_test_pred = softmax.predict(X_test) test_accuracy = np.mean(y_test_pred == y_test) print ('final test accuracy: ', test_accuracy)
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = load_data( cifar_dir, num_test=500) # ininialize W W = np.random.randn(3073, 10) * 0.0001 # test loss loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.0) #print('loss: %f' % loss) #print('sanity check: %f' % (-np.log(0.1))) # test gradient without regularization #def f(w): return softmax_loss_vectorized(W, X_dev, y_dev, 0.0)[0] #grad_numerical = grad_check_sparse(f, W, grad, 10) # test gradient with regularization #def f(w): return softmax_loss_vectorized(W, X_dev, y_dev, 1e2)[0] #grad_numerical = grad_check_sparse(f, W, grad, 10) softmax = Softmax() loss_history = softmax.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True) plt.plot(loss_history) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show()
# find the best learning_rate and regularization_strengths learning_rates = [1e-7, 5e-7, 1e-6, 5e-6] regularization_strengths = [1e-4, 5e-4, 1e-5, 5e-5] results = {} # The highest validation accuracy that we have seen so far best_val = -1 # The LinearSVM object that achieved the highest validation rate best_softmax = None for strength in regularization_strengths: for rate in learning_rates: smax = Softmax() smax.train(x_train, y_train, learning_rate=rate, reg=strength, num_iters=1500, verbose=True) y_train_pred = smax.predict(x_train) train_accuracy = np.mean(y_train == y_train_pred) y_valid_pred = smax.predict(x_val) val_accuracy = np.mean(y_val == y_valid_pred) results[(rate, strength)] = (train_accuracy, val_accuracy) if val_accuracy > best_val: best_val = val_accuracy best_softmax = smax
fxmh = f(x) x[ix] = oldval grad_numerical = (fxph - fxmh) / (2 * h) grad_analytic = analytic_grad[ix] rel_error = abs(grad_numerical - grad_analytic) / ( abs(grad_numerical) + abs(grad_analytic)) print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)) #现在我们对加入了正则项的梯度进行检验 loss, grad = softmax.softmax_loss_vectorized(w, x_dev, y_dev, 0.0) f = lambda w: softmax.softmax_loss_vectorized(w, x_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, w, grad) softmax = Softmax() #创建对象,此时W为空 tic = time.time() loss_hist = softmax.train(x_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) #此时svm对象中有W toc = time.time() print('that took %fs' % (toc - tic)) plt.plot(loss_hist) plt.xlabel('iteration number') plt.ylabel('loss value') plt.show() #训练完成之后,将参数保存,使用参数进行预测,计算准确率 y_train_pred = softmax.predict(x_train)
results = {} best_val = -1 best_softmax = None learning_rates = [1e-7, 5e-7] regularization_strengths = [(1 + 0.1 * i) * 1e4 for i in range(-3, 4)] + [(5 + 0.1 * i) * 1e4 for i in range(-3, 4)] ################################################################################ # TODO: # # Use the validation set to set the learning rate and regularization strength. # # This should be identical to the validation that you did for the SVM; save # # the best trained softmax classifer in best_softmax. # ################################################################################ for learning_rate in learning_rates: for regularization_strength in regularization_strengths: softmax_tmp = Softmax() softmax_tmp.train(X_train, y_train, learning_rate, regularization_strength, num_iters=2000) y_train_pred = softmax_tmp.predict(X_train) print('training accuracy: %f' % (np.mean(y_train == y_train_pred), )) tmp_train = np.mean(y_train == y_train_pred) y_val_pred = softmax_tmp.predict(X_val) print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), )) tmp_val = np.mean(y_val == y_val_pred) results[(learning_rate, regularization_strength)] = (tmp_train, tmp_val) if tmp_val > best_val: best_val = tmp_val
# Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of over 0.35 on the validation set. results = {} best_val = -1 best_softmax = None learning_rates = np.logspace(-10, 10, 10) # np.logspace(-10, 10, 8) #-10, -9, -8, -7, -6, -5, -4 regularization_strengths = np.logspace(-3, 6, 10) iters = 100 for r in learning_rates: for rs in regularization_strengths: softmax = Softmax() softmax.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = softmax.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = softmax.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_softmax = softmax y_test_pred = best_softmax.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred)