Example #1
0
def ridge_bin_class(X, w, w0, thresh=0.5):
    """
    Use a ridge regression model computed in fit_ridge to use as a binary
    classifier.  In this case, if w dot x >= threshold, return 1, else return
    0 for that element

    Parameters
    ----------
    X : array (n x d)
        features array (d features, n samples)
    y : vector (n x 1)
        labels
    lam : float (optional)
        regularization constant
    thresh : float (optional)
        classification threshold

    Returns
    -------
    w0 : float
        Constant offset term
    w : vector (d x 1)
        linear weight vector
    y : vector (n x 1)
        predictions
    """

    # Evaluate model, return predictions according to threshold
    y_hat = ru.linear_model(X, w, w0)
    y_hat_class = np.zeros_like(y_hat)
    y_hat_class[y_hat >= thresh] = 1

    return y_hat_class
# end function
Example #2
0
def mnist_ridge_thresh(X, y, lam = 1):
    """
    This function finds the best w (dot) x criteria for labeling a MNIST
    digit as a 2.  For the purposes of this classification, 2 -> 1, while
    everything else -> 0.  Computed simply by taking median of predictions
    corresponding to indicies of 1's in truth vector (only use on training data!)

	DEPRECATED -- DO NOT USE

    Parameters
    ----------
    X : array (n x d)
        Data array (n observations, d features)
    w : array (d x 1)
        feature weight array
    lam : float
        regularization constant

    Returns
    -------
    thresh_best : float
        optimal threshold for picking out 2s
    sl_best : float
        minimum square loss
    """

    # Fit model
    w0, w = ri.fit_ridge(X, y, lam=lam)

    # Predict
    y_hat = ru.linear_model(X, w, w0)

    # Mask where 2s occur in truth
    mask = (y == 1)

    # Take median of corresponding predicted values
    thresh_best = np.median(y_hat[mask])

    return thresh_best
Example #3
0
def mnist_ridge_thresh(X, y, lam=1):
    """
    This function finds the best w (dot) x criteria for labeling a MNIST
    digit as a 2.  For the purposes of this classification, 2 -> 1, while
    everything else -> 0.  Computed simply by taking median of predictions
    corresponding to indicies of 1's in truth vector (only use on training data!)

	DEPRECATED -- DO NOT USE

    Parameters
    ----------
    X : array (n x d)
        Data array (n observations, d features)
    w : array (d x 1)
        feature weight array
    lam : float
        regularization constant

    Returns
    -------
    thresh_best : float
        optimal threshold for picking out 2s
    sl_best : float
        minimum square loss
    """

    # Fit model
    w0, w = ri.fit_ridge(X, y, lam=lam)

    # Predict
    y_hat = ru.linear_model(X, w, w0)

    # Mask where 2s occur in truth
    mask = (y == 1)

    # Take median of corresponding predicted values
    thresh_best = np.median(y_hat[mask])

    return thresh_best
Example #4
0
def mnist_ridge_lam(X, y, num = 20, minlam=1.0e-10, maxlam=1.0e10):
    """
    This function finds the best regularization constant lambda
    for labeling a MNIST digit as a 2.  For the purposes of this classification,
    2 -> 1, while everything else -> 0.  Optimize over lam by minimizing the
    0-1 loss.

	DEPRECATED -- DO NOT USE

    Parameters
    ----------
    X : array (n x d)
        Data array (n observations, d features)
    w : array (d x 1)
        feature weight array
    lam : float
        regularization constant
    num : int
        number of threshold gridpoints to search over
    minlam : float
        minimum lambda grid value
    maxlam : float
        maximum lambda grid value

    Returns
    -------
    lam_best : float
        optimal threshold for picking out 2s
    loss_best : float
        minimum loss
    """

    # Make array of lambdas, thresholds
    lams = np.logspace(np.log10(minlam),np.log10(maxlam),num)
    thresh = np.zeros_like(lams)
    loss = np.zeros_like(lams)

    # Loop over thresholds, evaluate model, see which is best
    for ii in range(len(lams)):
        print("Iteration, lambda:",ii,lams[ii])

        # Fit training set for model parameters
        w0, w = ri.fit_ridge(X, y, lam=lams[ii])

        # Predict
        y_hat = ru.linear_model(X, w, w0)

        # Compute threshold as median of predicted rows corresponding to twos
        mask = (y == 1)
        thresh[ii] = np.median(y_hat[mask])

        # Classify, then get square loss, 1/0 error
        y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii])
        print("Predicted Number of 2s:",np.sum(y_hat))
        print("Predicted threshold:",thresh[ii])

        # Find minimum of loss to optimize lambda
        loss[ii] = ru.loss_01(y, y_hat)
        print("0-1 Loss:",loss[ii])

    # Get best threshold (min MSE on training set) and return it
    best_ind = np.argmin(loss)

    # Now plot it
    fig, ax = plt.subplots()

    ax.plot(lams,loss,"-o",lw=3)
    ax.set_xlabel(r"$\lambda$")
    ax.set_ylabel(r"0-1 Loss")

    # Plot best fit
    plt.axvline(x=lams[best_ind], ymin=-100, ymax = 100,
                linewidth=3, color='k', ls="--")

    ax.set_xscale("log")

    fig.tight_layout()
    fig.savefig("sl_lam.pdf")

    return lams[best_ind], loss[best_ind], thresh[best_ind]
Example #5
0
        # Find minimum threshold, lambda from minimum validation error
        ind_t,ind_l = np.unravel_index(err_val.argmin(), err_val.shape)
        best_lambda = lams[ind_l]
        best_thresh = thresh_arr[ind_t]
        print("Best lambda:",best_lambda)
        print("Best threshold:",best_thresh)

        plt.plot(lams,err_val[ind_t,:])
        plt.show()

    # Fit training set for model parameters using best fit lambda
    w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda)

    # Predict, then get square loss, 1/0 error on training data
    y_hat_train = ru.linear_model(X_train, w, w0)
    y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh)
    sl_train = val.square_loss(y_train_true, y_hat_train)
    err_10_train = val.loss_01(y_train_true, y_hat_train_class)

    # Load testing set
    print("Loading MNIST Testing data...")
    X_test, y_test = mu.load_mnist(dataset='testing')
    y_test_true = mnist_two_filter(y_test)
    print("True number of twos in testing set:",np.sum(y_test_true))

    # Predict, then get square loss, 1/0 error on testing data
    y_hat_test = ru.linear_model(X_test, w, w0)
    y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh)
    sl_test = val.square_loss(y_test_true, y_hat_test)
    err_10_test = val.loss_01(y_test_true, y_hat_test_class)
plt.figure()
plt.title(site + '_' + runclass + '_' + 'pupil')
bins = int(len(np.mean(p, 0).flatten()) / 2)
out = plt.hist(np.mean(p, 0).flatten(), bins=bins, color='green')

# --- TODO --- come up with a good way to divide into big/small
#count=out[0]
#pup_val=out[1]
#minima=pup_val[ss.argrelextrema(count,np.less,order=5)]
#plt.axvline(minima,color='k')

# ===================== Send data off for analysis ===========================

# call linear model function (regression between r and variabel set of predictors)
from regression_utils import linear_model
pred, rsq = linear_model(r, p)
r_no_p = (r.reshape(bincount * stimcount * repcount, cellcount) -
          pred).reshape(bincount, repcount, stimcount, cellcount)
#r = r_no_p

if reduce_method is 'PCA':
    pcs, var, step, loading = PCA(r, trial_averaged=False, center=True)

## ===========================================================================
'''   
# Filtering pupil...    
p_long = p.transpose(1,2,0).flatten()   
from scipy.signal import butter, lfilter, freqz
def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
Example #7
0
def mnist_ridge_lam(X, y, num=20, minlam=1.0e-10, maxlam=1.0e10):
    """
    This function finds the best regularization constant lambda
    for labeling a MNIST digit as a 2.  For the purposes of this classification,
    2 -> 1, while everything else -> 0.  Optimize over lam by minimizing the
    0-1 loss.

	DEPRECATED -- DO NOT USE

    Parameters
    ----------
    X : array (n x d)
        Data array (n observations, d features)
    w : array (d x 1)
        feature weight array
    lam : float
        regularization constant
    num : int
        number of threshold gridpoints to search over
    minlam : float
        minimum lambda grid value
    maxlam : float
        maximum lambda grid value

    Returns
    -------
    lam_best : float
        optimal threshold for picking out 2s
    loss_best : float
        minimum loss
    """

    # Make array of lambdas, thresholds
    lams = np.logspace(np.log10(minlam), np.log10(maxlam), num)
    thresh = np.zeros_like(lams)
    loss = np.zeros_like(lams)

    # Loop over thresholds, evaluate model, see which is best
    for ii in range(len(lams)):
        print("Iteration, lambda:", ii, lams[ii])

        # Fit training set for model parameters
        w0, w = ri.fit_ridge(X, y, lam=lams[ii])

        # Predict
        y_hat = ru.linear_model(X, w, w0)

        # Compute threshold as median of predicted rows corresponding to twos
        mask = (y == 1)
        thresh[ii] = np.median(y_hat[mask])

        # Classify, then get square loss, 1/0 error
        y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii])
        print("Predicted Number of 2s:", np.sum(y_hat))
        print("Predicted threshold:", thresh[ii])

        # Find minimum of loss to optimize lambda
        loss[ii] = ru.loss_01(y, y_hat)
        print("0-1 Loss:", loss[ii])

    # Get best threshold (min MSE on training set) and return it
    best_ind = np.argmin(loss)

    # Now plot it
    fig, ax = plt.subplots()

    ax.plot(lams, loss, "-o", lw=3)
    ax.set_xlabel(r"$\lambda$")
    ax.set_ylabel(r"0-1 Loss")

    # Plot best fit
    plt.axvline(x=lams[best_ind],
                ymin=-100,
                ymax=100,
                linewidth=3,
                color='k',
                ls="--")

    ax.set_xscale("log")

    fig.tight_layout()
    fig.savefig("sl_lam.pdf")

    return lams[best_ind], loss[best_ind], thresh[best_ind]
Example #8
0
        # Find minimum threshold, lambda from minimum validation error
        ind_t, ind_l = np.unravel_index(err_val.argmin(), err_val.shape)
        best_lambda = lams[ind_l]
        best_thresh = thresh_arr[ind_t]
        print("Best lambda:", best_lambda)
        print("Best threshold:", best_thresh)

        plt.plot(lams, err_val[ind_t, :])
        plt.show()

    # Fit training set for model parameters using best fit lambda
    w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda)

    # Predict, then get square loss, 1/0 error on training data
    y_hat_train = ru.linear_model(X_train, w, w0)
    y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh)
    sl_train = val.square_loss(y_train_true, y_hat_train)
    err_10_train = val.loss_01(y_train_true, y_hat_train_class)

    # Load testing set
    print("Loading MNIST Testing data...")
    X_test, y_test = mu.load_mnist(dataset='testing')
    y_test_true = mnist_two_filter(y_test)
    print("True number of twos in testing set:", np.sum(y_test_true))

    # Predict, then get square loss, 1/0 error on testing data
    y_hat_test = ru.linear_model(X_test, w, w0)
    y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh)
    sl_test = val.square_loss(y_test_true, y_hat_test)
    err_10_test = val.loss_01(y_test_true, y_hat_test_class)
Example #9
0
        # Now save it
        np.savez(w_cache, w_0=w_0, w=w)
    else:
        print("Loading training set fit from cache...")
        res = np.load(w_cache)
        w_0 = res["w_0"]
        w = res["w"]

    r2_train = ru.r_squared(X_train, y_train, w, w_0, sparse=True)
    print("r^2 on the training set: %.3lf" % r2_train)
    r2_val = ru.r_squared(X_val, y_val, w, w_0, sparse=True)
    print("r^2 on the validation set: %.3lf" % r2_val)

    # Compute error on testing set
    y_hat_test = ru.linear_model(X_test, w, w_0, sparse=True)
    RMSE_test = val.RMSE(y_test, y_hat_test)
    r2_test = ru.r_squared(X_test, y_test, w, w_0, sparse=True)
    print("RMSE on the testing set: %.2lf" % RMSE_test)
    print("r^2 on the testing set: %.3lf" % r2_test)

    # Inspect solution and output top 10 weights in magnitude and their corresponding name
    sorted_w_args = np.array(np.fabs(w).flatten()).argsort()[::-1][:20]
    for i in range(20):
        print("Feature: %s, weight: %.2lf" %
              (featureNames[sorted_w_args[i]], w[sorted_w_args[i]]))

###############################
#
# Upvote section
#
Example #10
0
        # Now save it
        np.savez(w_cache,w_0=w_0,w=w)
    else:
        print("Loading training set fit from cache...")
        res = np.load(w_cache)
        w_0 = res["w_0"]
        w = res["w"]

    r2_train = ru.r_squared(X_train, y_train, w, w_0, sparse=True)
    print("r^2 on the training set: %.3lf" % r2_train)
    r2_val = ru.r_squared(X_val, y_val, w, w_0, sparse=True)
    print("r^2 on the validation set: %.3lf" % r2_val)

    # Compute error on testing set
    y_hat_test = ru.linear_model(X_test, w, w_0, sparse=True)
    RMSE_test = val.RMSE(y_test, y_hat_test)
    r2_test = ru.r_squared(X_test, y_test, w, w_0, sparse=True)
    print("RMSE on the testing set: %.2lf" % RMSE_test)
    print("r^2 on the testing set: %.3lf" % r2_test)

    # Inspect solution and output top 10 weights in magnitude and their corresponding name
    sorted_w_args = np.array(np.fabs(w).flatten()).argsort()[::-1][:20]
    for i in range(20):
        print("Feature: %s, weight: %.2lf" % (featureNames[sorted_w_args[i]],
                                                w[sorted_w_args[i]]))

###############################
#
# Upvote section
#