Exemple #1
0
def mnist_ridge_lam(X, y, num = 20, minlam=1.0e-10, maxlam=1.0e10):
    """
    This function finds the best regularization constant lambda
    for labeling a MNIST digit as a 2.  For the purposes of this classification,
    2 -> 1, while everything else -> 0.  Optimize over lam by minimizing the
    0-1 loss.

	DEPRECATED -- DO NOT USE

    Parameters
    ----------
    X : array (n x d)
        Data array (n observations, d features)
    w : array (d x 1)
        feature weight array
    lam : float
        regularization constant
    num : int
        number of threshold gridpoints to search over
    minlam : float
        minimum lambda grid value
    maxlam : float
        maximum lambda grid value

    Returns
    -------
    lam_best : float
        optimal threshold for picking out 2s
    loss_best : float
        minimum loss
    """

    # Make array of lambdas, thresholds
    lams = np.logspace(np.log10(minlam),np.log10(maxlam),num)
    thresh = np.zeros_like(lams)
    loss = np.zeros_like(lams)

    # Loop over thresholds, evaluate model, see which is best
    for ii in range(len(lams)):
        print("Iteration, lambda:",ii,lams[ii])

        # Fit training set for model parameters
        w0, w = ri.fit_ridge(X, y, lam=lams[ii])

        # Predict
        y_hat = ru.linear_model(X, w, w0)

        # Compute threshold as median of predicted rows corresponding to twos
        mask = (y == 1)
        thresh[ii] = np.median(y_hat[mask])

        # Classify, then get square loss, 1/0 error
        y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii])
        print("Predicted Number of 2s:",np.sum(y_hat))
        print("Predicted threshold:",thresh[ii])

        # Find minimum of loss to optimize lambda
        loss[ii] = ru.loss_01(y, y_hat)
        print("0-1 Loss:",loss[ii])

    # Get best threshold (min MSE on training set) and return it
    best_ind = np.argmin(loss)

    # Now plot it
    fig, ax = plt.subplots()

    ax.plot(lams,loss,"-o",lw=3)
    ax.set_xlabel(r"$\lambda$")
    ax.set_ylabel(r"0-1 Loss")

    # Plot best fit
    plt.axvline(x=lams[best_ind], ymin=-100, ymax = 100,
                linewidth=3, color='k', ls="--")

    ax.set_xscale("log")

    fig.tight_layout()
    fig.savefig("sl_lam.pdf")

    return lams[best_ind], loss[best_ind], thresh[best_ind]
Exemple #2
0
def mnist_ridge_lam(X, y, num=20, minlam=1.0e-10, maxlam=1.0e10):
    """
    This function finds the best regularization constant lambda
    for labeling a MNIST digit as a 2.  For the purposes of this classification,
    2 -> 1, while everything else -> 0.  Optimize over lam by minimizing the
    0-1 loss.

	DEPRECATED -- DO NOT USE

    Parameters
    ----------
    X : array (n x d)
        Data array (n observations, d features)
    w : array (d x 1)
        feature weight array
    lam : float
        regularization constant
    num : int
        number of threshold gridpoints to search over
    minlam : float
        minimum lambda grid value
    maxlam : float
        maximum lambda grid value

    Returns
    -------
    lam_best : float
        optimal threshold for picking out 2s
    loss_best : float
        minimum loss
    """

    # Make array of lambdas, thresholds
    lams = np.logspace(np.log10(minlam), np.log10(maxlam), num)
    thresh = np.zeros_like(lams)
    loss = np.zeros_like(lams)

    # Loop over thresholds, evaluate model, see which is best
    for ii in range(len(lams)):
        print("Iteration, lambda:", ii, lams[ii])

        # Fit training set for model parameters
        w0, w = ri.fit_ridge(X, y, lam=lams[ii])

        # Predict
        y_hat = ru.linear_model(X, w, w0)

        # Compute threshold as median of predicted rows corresponding to twos
        mask = (y == 1)
        thresh[ii] = np.median(y_hat[mask])

        # Classify, then get square loss, 1/0 error
        y_hat = ri.ridge_bin_class(X, w, w0, thresh=thresh[ii])
        print("Predicted Number of 2s:", np.sum(y_hat))
        print("Predicted threshold:", thresh[ii])

        # Find minimum of loss to optimize lambda
        loss[ii] = ru.loss_01(y, y_hat)
        print("0-1 Loss:", loss[ii])

    # Get best threshold (min MSE on training set) and return it
    best_ind = np.argmin(loss)

    # Now plot it
    fig, ax = plt.subplots()

    ax.plot(lams, loss, "-o", lw=3)
    ax.set_xlabel(r"$\lambda$")
    ax.set_ylabel(r"0-1 Loss")

    # Plot best fit
    plt.axvline(x=lams[best_ind],
                ymin=-100,
                ymax=100,
                linewidth=3,
                color='k',
                ls="--")

    ax.set_xscale("log")

    fig.tight_layout()
    fig.savefig("sl_lam.pdf")

    return lams[best_ind], loss[best_ind], thresh[best_ind]
Exemple #3
0
        # Find minimum threshold, lambda from minimum validation error
        ind_t,ind_l = np.unravel_index(err_val.argmin(), err_val.shape)
        best_lambda = lams[ind_l]
        best_thresh = thresh_arr[ind_t]
        print("Best lambda:",best_lambda)
        print("Best threshold:",best_thresh)

        plt.plot(lams,err_val[ind_t,:])
        plt.show()

    # Fit training set for model parameters using best fit lambda
    w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda)

    # Predict, then get square loss, 1/0 error on training data
    y_hat_train = ru.linear_model(X_train, w, w0)
    y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh)
    sl_train = val.square_loss(y_train_true, y_hat_train)
    err_10_train = val.loss_01(y_train_true, y_hat_train_class)

    # Load testing set
    print("Loading MNIST Testing data...")
    X_test, y_test = mu.load_mnist(dataset='testing')
    y_test_true = mnist_two_filter(y_test)
    print("True number of twos in testing set:",np.sum(y_test_true))

    # Predict, then get square loss, 1/0 error on testing data
    y_hat_test = ru.linear_model(X_test, w, w0)
    y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh)
    sl_test = val.square_loss(y_test_true, y_hat_test)
    err_10_test = val.loss_01(y_test_true, y_hat_test_class)
Exemple #4
0
        # Find minimum threshold, lambda from minimum validation error
        ind_t, ind_l = np.unravel_index(err_val.argmin(), err_val.shape)
        best_lambda = lams[ind_l]
        best_thresh = thresh_arr[ind_t]
        print("Best lambda:", best_lambda)
        print("Best threshold:", best_thresh)

        plt.plot(lams, err_val[ind_t, :])
        plt.show()

    # Fit training set for model parameters using best fit lambda
    w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda)

    # Predict, then get square loss, 1/0 error on training data
    y_hat_train = ru.linear_model(X_train, w, w0)
    y_hat_train_class = ri.ridge_bin_class(X_train, w, w0, thresh=best_thresh)
    sl_train = val.square_loss(y_train_true, y_hat_train)
    err_10_train = val.loss_01(y_train_true, y_hat_train_class)

    # Load testing set
    print("Loading MNIST Testing data...")
    X_test, y_test = mu.load_mnist(dataset='testing')
    y_test_true = mnist_two_filter(y_test)
    print("True number of twos in testing set:", np.sum(y_test_true))

    # Predict, then get square loss, 1/0 error on testing data
    y_hat_test = ru.linear_model(X_test, w, w0)
    y_hat_test_class = ri.ridge_bin_class(X_test, w, w0, thresh=best_thresh)
    sl_test = val.square_loss(y_test_true, y_hat_test)
    err_10_test = val.loss_01(y_test_true, y_hat_test_class)