print("Loading MNIST Training data...") X_train, y_train = mu.load_mnist(dataset='training') y_train_true = mnist_two_filter(y_train) print("True number of twos in training set:",np.sum(y_train_true)) # Perform grid search to find best regularization constant and threshold? if find_best_lam: print("Finding optimal lambda and threshold via regularization path.") thresh_arr = np.linspace(-0.2,1.,num) err_val = np.zeros((num,num)) err_train = np.zeros((num,num)) # Split training data into subtraining set, validation set X_tr, y_tr, X_val, y_val = val.split_data(X_train, y_train, frac=frac, seed=seed) # Filter y values to 0, 1 labels y_tr_true = mnist_two_filter(y_tr) y_val_true = mnist_two_filter(y_val) # Loop over thresholds for i in range(num): # Internally loop over lambdas in regularization path err_val[i,:], err_train[i,:], lams = val.linear_reg_path(X_tr, y_tr_true, X_val, y_val_true, ri.fit_ridge, lammax=lammax, scale=scale, num=num, error_func=val.loss_01, thresh=thresh_arr[i], **kwargs)
print("Loading data...") # Load a text file of integers: y = np.loadtxt("../Data/hw1-data/star_labels.txt", dtype=np.int) y = y.reshape(len(y), 1) # Load a text file of feature names: featureNames = open( "../Data/hw1-data/star_features.txt").read().splitlines() # Load a csv of floats as a sparse matrix: X = io.mmread("../Data/hw1-data/star_data.mtx").tocsc() # Split into training set, testing set X_train, y_train, X_test, y_test = val.split_data(X, y, frac=test_frac, seed=seed) # Now split training set into training set, validation set X_train, y_train, X_val, y_val = val.split_data(X_train, y_train, frac=val_frac, seed=seed) print("Train shapes:", X_train.shape, y_train.shape) print("Val shapes:", X_val.shape, y_val.shape) print("Test shapes:", X_test.shape, y_test.shape) # Run analysis if answer cache doesn't exist if not os.path.exists(cache): print("Cache does not exist, running analysis...")
X_train, y_train = mu.load_mnist(dataset='training') y_train_true = mnist_two_filter(y_train) print("True number of twos in training set:", np.sum(y_train_true)) # Perform grid search to find best regularization constant and threshold? if find_best_lam: print("Finding optimal lambda and threshold via regularization path.") thresh_arr = np.linspace(-0.2, 1., num) err_val = np.zeros((num, num)) err_train = np.zeros((num, num)) # Split training data into subtraining set, validation set X_tr, y_tr, X_val, y_val = val.split_data(X_train, y_train, frac=frac, seed=seed) # Filter y values to 0, 1 labels y_tr_true = mnist_two_filter(y_tr) y_val_true = mnist_two_filter(y_val) # Loop over thresholds for i in range(num): # Internally loop over lambdas in regularization path err_val[i, :], err_train[i, :], lams = val.linear_reg_path( X_tr, y_tr_true, X_val, y_val_true, ri.fit_ridge,
# Run! print("Loading data...") # Load a text file of integers: y = np.loadtxt("../Data/hw1-data/star_labels.txt", dtype=np.int) y = y.reshape(len(y),1) # Load a text file of feature names: featureNames = open("../Data/hw1-data/star_features.txt").read().splitlines() # Load a csv of floats as a sparse matrix: X = io.mmread("../Data/hw1-data/star_data.mtx").tocsc() # Split into training set, testing set X_train, y_train, X_test, y_test = val.split_data(X, y, frac=test_frac, seed=seed) # Now split training set into training set, validation set X_train, y_train, X_val, y_val = val.split_data(X_train, y_train, frac=val_frac, seed=seed) print("Train shapes:",X_train.shape,y_train.shape) print("Val shapes:",X_val.shape,y_val.shape) print("Test shapes:",X_test.shape,y_test.shape) # Run analysis if answer cache doesn't exist if not os.path.exists(cache): print("Cache does not exist, running analysis...") # Set maximum lambda, minimum lambda lammax = lu.compute_max_lambda(X_train,y_train)