Exemple #1
0
    print("Loading MNIST Training data...")
    X_train, y_train = mu.load_mnist(dataset='training')

    y_train_true = mnist_two_filter(y_train)
    print("True number of twos in training set:",np.sum(y_train_true))

    # Perform grid search to find best regularization constant and threshold?
    if find_best_lam:
        print("Finding optimal lambda and threshold via regularization path.")

        thresh_arr = np.linspace(-0.2,1.,num)
        err_val = np.zeros((num,num))
        err_train = np.zeros((num,num))

        # Split training data into subtraining set, validation set
        X_tr, y_tr, X_val, y_val = val.split_data(X_train, y_train, frac=frac, seed=seed)

        # Filter y values to 0, 1 labels
        y_tr_true = mnist_two_filter(y_tr)
        y_val_true = mnist_two_filter(y_val)

        # Loop over thresholds
        for i in range(num):
            # Internally loop over lambdas in regularization path
            err_val[i,:], err_train[i,:], lams = val.linear_reg_path(X_tr, y_tr_true, X_val,
            														 y_val_true, ri.fit_ridge,
            														 lammax=lammax,
            														 scale=scale,
																	 num=num, error_func=val.loss_01,
																	 thresh=thresh_arr[i], **kwargs)
Exemple #2
0
    print("Loading data...")
    # Load a text file of integers:
    y = np.loadtxt("../Data/hw1-data/star_labels.txt", dtype=np.int)
    y = y.reshape(len(y), 1)

    # Load a text file of feature names:
    featureNames = open(
        "../Data/hw1-data/star_features.txt").read().splitlines()

    # Load a csv of floats as a sparse matrix:
    X = io.mmread("../Data/hw1-data/star_data.mtx").tocsc()

    # Split into training set, testing set
    X_train, y_train, X_test, y_test = val.split_data(X,
                                                      y,
                                                      frac=test_frac,
                                                      seed=seed)

    # Now split training set into training set, validation set
    X_train, y_train, X_val, y_val = val.split_data(X_train,
                                                    y_train,
                                                    frac=val_frac,
                                                    seed=seed)

    print("Train shapes:", X_train.shape, y_train.shape)
    print("Val shapes:", X_val.shape, y_val.shape)
    print("Test shapes:", X_test.shape, y_test.shape)

    # Run analysis if answer cache doesn't exist
    if not os.path.exists(cache):
        print("Cache does not exist, running analysis...")
Exemple #3
0
    X_train, y_train = mu.load_mnist(dataset='training')

    y_train_true = mnist_two_filter(y_train)
    print("True number of twos in training set:", np.sum(y_train_true))

    # Perform grid search to find best regularization constant and threshold?
    if find_best_lam:
        print("Finding optimal lambda and threshold via regularization path.")

        thresh_arr = np.linspace(-0.2, 1., num)
        err_val = np.zeros((num, num))
        err_train = np.zeros((num, num))

        # Split training data into subtraining set, validation set
        X_tr, y_tr, X_val, y_val = val.split_data(X_train,
                                                  y_train,
                                                  frac=frac,
                                                  seed=seed)

        # Filter y values to 0, 1 labels
        y_tr_true = mnist_two_filter(y_tr)
        y_val_true = mnist_two_filter(y_val)

        # Loop over thresholds
        for i in range(num):
            # Internally loop over lambdas in regularization path
            err_val[i, :], err_train[i, :], lams = val.linear_reg_path(
                X_tr,
                y_tr_true,
                X_val,
                y_val_true,
                ri.fit_ridge,
Exemple #4
0
    # Run!

    print("Loading data...")
    # Load a text file of integers:
    y = np.loadtxt("../Data/hw1-data/star_labels.txt", dtype=np.int)
    y = y.reshape(len(y),1)

    # Load a text file of feature names:
    featureNames = open("../Data/hw1-data/star_features.txt").read().splitlines()

    # Load a csv of floats as a sparse matrix:
    X = io.mmread("../Data/hw1-data/star_data.mtx").tocsc()

    # Split into training set, testing set
    X_train, y_train, X_test, y_test = val.split_data(X, y, frac=test_frac, seed=seed)

    # Now split training set into training set, validation set
    X_train, y_train, X_val, y_val = val.split_data(X_train, y_train, frac=val_frac,
                                                    seed=seed)

    print("Train shapes:",X_train.shape,y_train.shape)
    print("Val shapes:",X_val.shape,y_val.shape)
    print("Test shapes:",X_test.shape,y_test.shape)

    # Run analysis if answer cache doesn't exist
    if not os.path.exists(cache):
        print("Cache does not exist, running analysis...")

        # Set maximum lambda, minimum lambda
        lammax = lu.compute_max_lambda(X_train,y_train)