def ridge_bin_class(X, w, w0, thresh=0.5): """ Use a ridge regression model computed in fit_ridge to use as a binary classifier. In this case, if w dot x >= threshold, return 1, else return 0 for that element Parameters ---------- X : array (n x d) features array (d features, n samples) y : vector (n x 1) labels lam : float (optional) regularization constant thresh : float (optional) classification threshold Returns ------- w0 : float Constant offset term w : vector (d x 1) linear weight vector y : vector (n x 1) predictions """ # Evaluate model, return predictions according to threshold y_hat = ru.linear_model(X, w, w0) y_hat_class = np.zeros_like(y_hat) y_hat_class[y_hat >= thresh] = 1 return y_hat_class # end function
def mnist_ridge_thresh(X, y, lam = 1): """ This function finds the best w (dot) x criteria for labeling a MNIST digit as a 2. For the purposes of this classification, 2 -> 1, while everything else -> 0. Computed simply by taking median of predictions corresponding to indicies of 1's in truth vector (only use on training data!) DEPRECATED -- DO NOT USE Parameters ---------- X : array (n x d) Data array (n observations, d features) w : array (d x 1) feature weight array lam : float regularization constant Returns ------- thresh_best : float optimal threshold for picking out 2s sl_best : float minimum square loss """ # Fit model w0, w = ri.fit_ridge(X, y, lam=lam) # Predict y_hat = ru.linear_model(X, w, w0) # Mask where 2s occur in truth mask = (y == 1) # Take median of corresponding predicted values thresh_best = np.median(y_hat[mask]) return thresh_best
def mnist_ridge_thresh(X, y, lam=1): """ This function finds the best w (dot) x criteria for labeling a MNIST digit as a 2. For the purposes of this classification, 2 -> 1, while everything else -> 0. Computed simply by taking median of predictions corresponding to indicies of 1's in truth vector (only use on training data!) DEPRECATED -- DO NOT USE Parameters ---------- X : array (n x d) Data array (n observations, d features) w : array (d x 1) feature weight array lam : float regularization constant Returns ------- thresh_best : float optimal threshold for picking out 2s sl_best : float minimum square loss """ # Fit model w0, w = ri.fit_ridge(X, y, lam=lam) # Predict y_hat = ru.linear_model(X, w, w0) # Mask where 2s occur in truth mask = (y == 1) # Take median of corresponding predicted values thresh_best = np.median(y_hat[mask]) return thresh_best
def mnist_ridge_lam(X, y, num = 20, minlam=1.0e-10, maxlam=1.0e10): """ This function finds the best regularization constant lambda for labeling a MNIST digit as a 2. For the purposes of this classification, 2 -> 1, while everything else -> 0. Optimize over lam by minimizing the 0-1 loss. DEPRECATED -- DO NOT USE Parameters ---------- X : array (n x d) Data array (n observations, d features) w : array (d x 1) feature weight array lam : float regularization constant num : int number of threshold gridpoints to search over minlam : float minimum lambda grid value maxlam : float maximum lambda grid value Returns ------- lam_best : float optimal threshold for picking out 2s loss_best : float minimum loss """ # Make array of lambdas, thresholds lams = np.logspace(np.log10(minlam),np.log10(maxlam),num) thresh = np.zeros_like(lams) loss = np.zeros_like(lams) # Loop over thresholds, evaluate model, see which is best for ii in range(len(lams)): print("Iteration, lambda:",ii,lams[ii]) # Fit training set for model parameters w0, w = ri.fit_ridge(X, y, lam=lams[ii]) # Predict y_hat = ru.linear_model(X, w, w0) # Compute threshold as median of predicted rows corresponding to twos mask = (y == 1) thresh[ii] = np.median(y_hat[mask]) # Classify, then get square loss, 1/0 error y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii]) print("Predicted Number of 2s:",np.sum(y_hat)) print("Predicted threshold:",thresh[ii]) # Find minimum of loss to optimize lambda loss[ii] = ru.loss_01(y, y_hat) print("0-1 Loss:",loss[ii]) # Get best threshold (min MSE on training set) and return it best_ind = np.argmin(loss) # Now plot it fig, ax = plt.subplots() ax.plot(lams,loss,"-o",lw=3) ax.set_xlabel(r"$\lambda$") ax.set_ylabel(r"0-1 Loss") # Plot best fit plt.axvline(x=lams[best_ind], ymin=-100, ymax = 100, linewidth=3, color='k', ls="--") ax.set_xscale("log") fig.tight_layout() fig.savefig("sl_lam.pdf") return lams[best_ind], loss[best_ind], thresh[best_ind]
# Find minimum threshold, lambda from minimum validation error ind_t,ind_l = np.unravel_index(err_val.argmin(), err_val.shape) best_lambda = lams[ind_l] best_thresh = thresh_arr[ind_t] print("Best lambda:",best_lambda) print("Best threshold:",best_thresh) plt.plot(lams,err_val[ind_t,:]) plt.show() # Fit training set for model parameters using best fit lambda w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda) # Predict, then get square loss, 1/0 error on training data y_hat_train = ru.linear_model(X_train, w, w0) y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh) sl_train = val.square_loss(y_train_true, y_hat_train) err_10_train = val.loss_01(y_train_true, y_hat_train_class) # Load testing set print("Loading MNIST Testing data...") X_test, y_test = mu.load_mnist(dataset='testing') y_test_true = mnist_two_filter(y_test) print("True number of twos in testing set:",np.sum(y_test_true)) # Predict, then get square loss, 1/0 error on testing data y_hat_test = ru.linear_model(X_test, w, w0) y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh) sl_test = val.square_loss(y_test_true, y_hat_test) err_10_test = val.loss_01(y_test_true, y_hat_test_class)
plt.figure() plt.title(site + '_' + runclass + '_' + 'pupil') bins = int(len(np.mean(p, 0).flatten()) / 2) out = plt.hist(np.mean(p, 0).flatten(), bins=bins, color='green') # --- TODO --- come up with a good way to divide into big/small #count=out[0] #pup_val=out[1] #minima=pup_val[ss.argrelextrema(count,np.less,order=5)] #plt.axvline(minima,color='k') # ===================== Send data off for analysis =========================== # call linear model function (regression between r and variabel set of predictors) from regression_utils import linear_model pred, rsq = linear_model(r, p) r_no_p = (r.reshape(bincount * stimcount * repcount, cellcount) - pred).reshape(bincount, repcount, stimcount, cellcount) #r = r_no_p if reduce_method is 'PCA': pcs, var, step, loading = PCA(r, trial_averaged=False, center=True) ## =========================================================================== ''' # Filtering pupil... p_long = p.transpose(1,2,0).flatten() from scipy.signal import butter, lfilter, freqz def butter_lowpass(cutoff, fs, order=5): nyq = 0.5 * fs normal_cutoff = cutoff / nyq
def mnist_ridge_lam(X, y, num=20, minlam=1.0e-10, maxlam=1.0e10): """ This function finds the best regularization constant lambda for labeling a MNIST digit as a 2. For the purposes of this classification, 2 -> 1, while everything else -> 0. Optimize over lam by minimizing the 0-1 loss. DEPRECATED -- DO NOT USE Parameters ---------- X : array (n x d) Data array (n observations, d features) w : array (d x 1) feature weight array lam : float regularization constant num : int number of threshold gridpoints to search over minlam : float minimum lambda grid value maxlam : float maximum lambda grid value Returns ------- lam_best : float optimal threshold for picking out 2s loss_best : float minimum loss """ # Make array of lambdas, thresholds lams = np.logspace(np.log10(minlam), np.log10(maxlam), num) thresh = np.zeros_like(lams) loss = np.zeros_like(lams) # Loop over thresholds, evaluate model, see which is best for ii in range(len(lams)): print("Iteration, lambda:", ii, lams[ii]) # Fit training set for model parameters w0, w = ri.fit_ridge(X, y, lam=lams[ii]) # Predict y_hat = ru.linear_model(X, w, w0) # Compute threshold as median of predicted rows corresponding to twos mask = (y == 1) thresh[ii] = np.median(y_hat[mask]) # Classify, then get square loss, 1/0 error y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii]) print("Predicted Number of 2s:", np.sum(y_hat)) print("Predicted threshold:", thresh[ii]) # Find minimum of loss to optimize lambda loss[ii] = ru.loss_01(y, y_hat) print("0-1 Loss:", loss[ii]) # Get best threshold (min MSE on training set) and return it best_ind = np.argmin(loss) # Now plot it fig, ax = plt.subplots() ax.plot(lams, loss, "-o", lw=3) ax.set_xlabel(r"$\lambda$") ax.set_ylabel(r"0-1 Loss") # Plot best fit plt.axvline(x=lams[best_ind], ymin=-100, ymax=100, linewidth=3, color='k', ls="--") ax.set_xscale("log") fig.tight_layout() fig.savefig("sl_lam.pdf") return lams[best_ind], loss[best_ind], thresh[best_ind]
# Find minimum threshold, lambda from minimum validation error ind_t, ind_l = np.unravel_index(err_val.argmin(), err_val.shape) best_lambda = lams[ind_l] best_thresh = thresh_arr[ind_t] print("Best lambda:", best_lambda) print("Best threshold:", best_thresh) plt.plot(lams, err_val[ind_t, :]) plt.show() # Fit training set for model parameters using best fit lambda w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda) # Predict, then get square loss, 1/0 error on training data y_hat_train = ru.linear_model(X_train, w, w0) y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh) sl_train = val.square_loss(y_train_true, y_hat_train) err_10_train = val.loss_01(y_train_true, y_hat_train_class) # Load testing set print("Loading MNIST Testing data...") X_test, y_test = mu.load_mnist(dataset='testing') y_test_true = mnist_two_filter(y_test) print("True number of twos in testing set:", np.sum(y_test_true)) # Predict, then get square loss, 1/0 error on testing data y_hat_test = ru.linear_model(X_test, w, w0) y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh) sl_test = val.square_loss(y_test_true, y_hat_test) err_10_test = val.loss_01(y_test_true, y_hat_test_class)
# Now save it np.savez(w_cache, w_0=w_0, w=w) else: print("Loading training set fit from cache...") res = np.load(w_cache) w_0 = res["w_0"] w = res["w"] r2_train = ru.r_squared(X_train, y_train, w, w_0, sparse=True) print("r^2 on the training set: %.3lf" % r2_train) r2_val = ru.r_squared(X_val, y_val, w, w_0, sparse=True) print("r^2 on the validation set: %.3lf" % r2_val) # Compute error on testing set y_hat_test = ru.linear_model(X_test, w, w_0, sparse=True) RMSE_test = val.RMSE(y_test, y_hat_test) r2_test = ru.r_squared(X_test, y_test, w, w_0, sparse=True) print("RMSE on the testing set: %.2lf" % RMSE_test) print("r^2 on the testing set: %.3lf" % r2_test) # Inspect solution and output top 10 weights in magnitude and their corresponding name sorted_w_args = np.array(np.fabs(w).flatten()).argsort()[::-1][:20] for i in range(20): print("Feature: %s, weight: %.2lf" % (featureNames[sorted_w_args[i]], w[sorted_w_args[i]])) ############################### # # Upvote section #
# Now save it np.savez(w_cache,w_0=w_0,w=w) else: print("Loading training set fit from cache...") res = np.load(w_cache) w_0 = res["w_0"] w = res["w"] r2_train = ru.r_squared(X_train, y_train, w, w_0, sparse=True) print("r^2 on the training set: %.3lf" % r2_train) r2_val = ru.r_squared(X_val, y_val, w, w_0, sparse=True) print("r^2 on the validation set: %.3lf" % r2_val) # Compute error on testing set y_hat_test = ru.linear_model(X_test, w, w_0, sparse=True) RMSE_test = val.RMSE(y_test, y_hat_test) r2_test = ru.r_squared(X_test, y_test, w, w_0, sparse=True) print("RMSE on the testing set: %.2lf" % RMSE_test) print("r^2 on the testing set: %.3lf" % r2_test) # Inspect solution and output top 10 weights in magnitude and their corresponding name sorted_w_args = np.array(np.fabs(w).flatten()).argsort()[::-1][:20] for i in range(20): print("Feature: %s, weight: %.2lf" % (featureNames[sorted_w_args[i]], w[sorted_w_args[i]])) ############################### # # Upvote section #