def mnist_ridge_lam(X, y, num = 20, minlam=1.0e-10, maxlam=1.0e10): """ This function finds the best regularization constant lambda for labeling a MNIST digit as a 2. For the purposes of this classification, 2 -> 1, while everything else -> 0. Optimize over lam by minimizing the 0-1 loss. DEPRECATED -- DO NOT USE Parameters ---------- X : array (n x d) Data array (n observations, d features) w : array (d x 1) feature weight array lam : float regularization constant num : int number of threshold gridpoints to search over minlam : float minimum lambda grid value maxlam : float maximum lambda grid value Returns ------- lam_best : float optimal threshold for picking out 2s loss_best : float minimum loss """ # Make array of lambdas, thresholds lams = np.logspace(np.log10(minlam),np.log10(maxlam),num) thresh = np.zeros_like(lams) loss = np.zeros_like(lams) # Loop over thresholds, evaluate model, see which is best for ii in range(len(lams)): print("Iteration, lambda:",ii,lams[ii]) # Fit training set for model parameters w0, w = ri.fit_ridge(X, y, lam=lams[ii]) # Predict y_hat = ru.linear_model(X, w, w0) # Compute threshold as median of predicted rows corresponding to twos mask = (y == 1) thresh[ii] = np.median(y_hat[mask]) # Classify, then get square loss, 1/0 error y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii]) print("Predicted Number of 2s:",np.sum(y_hat)) print("Predicted threshold:",thresh[ii]) # Find minimum of loss to optimize lambda loss[ii] = ru.loss_01(y, y_hat) print("0-1 Loss:",loss[ii]) # Get best threshold (min MSE on training set) and return it best_ind = np.argmin(loss) # Now plot it fig, ax = plt.subplots() ax.plot(lams,loss,"-o",lw=3) ax.set_xlabel(r"$\lambda$") ax.set_ylabel(r"0-1 Loss") # Plot best fit plt.axvline(x=lams[best_ind], ymin=-100, ymax = 100, linewidth=3, color='k', ls="--") ax.set_xscale("log") fig.tight_layout() fig.savefig("sl_lam.pdf") return lams[best_ind], loss[best_ind], thresh[best_ind]
def mnist_ridge_lam(X, y, num=20, minlam=1.0e-10, maxlam=1.0e10): """ This function finds the best regularization constant lambda for labeling a MNIST digit as a 2. For the purposes of this classification, 2 -> 1, while everything else -> 0. Optimize over lam by minimizing the 0-1 loss. DEPRECATED -- DO NOT USE Parameters ---------- X : array (n x d) Data array (n observations, d features) w : array (d x 1) feature weight array lam : float regularization constant num : int number of threshold gridpoints to search over minlam : float minimum lambda grid value maxlam : float maximum lambda grid value Returns ------- lam_best : float optimal threshold for picking out 2s loss_best : float minimum loss """ # Make array of lambdas, thresholds lams = np.logspace(np.log10(minlam), np.log10(maxlam), num) thresh = np.zeros_like(lams) loss = np.zeros_like(lams) # Loop over thresholds, evaluate model, see which is best for ii in range(len(lams)): print("Iteration, lambda:", ii, lams[ii]) # Fit training set for model parameters w0, w = ri.fit_ridge(X, y, lam=lams[ii]) # Predict y_hat = ru.linear_model(X, w, w0) # Compute threshold as median of predicted rows corresponding to twos mask = (y == 1) thresh[ii] = np.median(y_hat[mask]) # Classify, then get square loss, 1/0 error y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii]) print("Predicted Number of 2s:", np.sum(y_hat)) print("Predicted threshold:", thresh[ii]) # Find minimum of loss to optimize lambda loss[ii] = ru.loss_01(y, y_hat) print("0-1 Loss:", loss[ii]) # Get best threshold (min MSE on training set) and return it best_ind = np.argmin(loss) # Now plot it fig, ax = plt.subplots() ax.plot(lams, loss, "-o", lw=3) ax.set_xlabel(r"$\lambda$") ax.set_ylabel(r"0-1 Loss") # Plot best fit plt.axvline(x=lams[best_ind], ymin=-100, ymax=100, linewidth=3, color='k', ls="--") ax.set_xscale("log") fig.tight_layout() fig.savefig("sl_lam.pdf") return lams[best_ind], loss[best_ind], thresh[best_ind]
# Find minimum threshold, lambda from minimum validation error ind_t,ind_l = np.unravel_index(err_val.argmin(), err_val.shape) best_lambda = lams[ind_l] best_thresh = thresh_arr[ind_t] print("Best lambda:",best_lambda) print("Best threshold:",best_thresh) plt.plot(lams,err_val[ind_t,:]) plt.show() # Fit training set for model parameters using best fit lambda w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda) # Predict, then get square loss, 1/0 error on training data y_hat_train = ru.linear_model(X_train, w, w0) y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh) sl_train = val.square_loss(y_train_true, y_hat_train) err_10_train = val.loss_01(y_train_true, y_hat_train_class) # Load testing set print("Loading MNIST Testing data...") X_test, y_test = mu.load_mnist(dataset='testing') y_test_true = mnist_two_filter(y_test) print("True number of twos in testing set:",np.sum(y_test_true)) # Predict, then get square loss, 1/0 error on testing data y_hat_test = ru.linear_model(X_test, w, w0) y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh) sl_test = val.square_loss(y_test_true, y_hat_test) err_10_test = val.loss_01(y_test_true, y_hat_test_class)
# Find minimum threshold, lambda from minimum validation error ind_t, ind_l = np.unravel_index(err_val.argmin(), err_val.shape) best_lambda = lams[ind_l] best_thresh = thresh_arr[ind_t] print("Best lambda:", best_lambda) print("Best threshold:", best_thresh) plt.plot(lams, err_val[ind_t, :]) plt.show() # Fit training set for model parameters using best fit lambda w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda) # Predict, then get square loss, 1/0 error on training data y_hat_train = ru.linear_model(X_train, w, w0) y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh) sl_train = val.square_loss(y_train_true, y_hat_train) err_10_train = val.loss_01(y_train_true, y_hat_train_class) # Load testing set print("Loading MNIST Testing data...") X_test, y_test = mu.load_mnist(dataset='testing') y_test_true = mnist_two_filter(y_test) print("True number of twos in testing set:", np.sum(y_test_true)) # Predict, then get square loss, 1/0 error on testing data y_hat_test = ru.linear_model(X_test, w, w0) y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh) sl_test = val.square_loss(y_test_true, y_hat_test) err_10_test = val.loss_01(y_test_true, y_hat_test_class)