def load_cv_data(validation_data_folder, percent): """ custom cv data """ all_folds = [] for fold in [1, 2, 3, 4, 5]: X_file = f"{validation_data_folder}/Fold{fold}/X_{percent}%.txt" y_file = f"{validation_data_folder}/Fold{fold}/y_{percent}%.txt" X = utils.load_data_from_txt_file(X_file) y = utils.load_data_from_txt_file(y_file, True) all_folds.append((X, y)) return all_folds
def load_learning_curve_data(learning_curve_data_folder): """ Load all learning curve data Args: learning_curve_data_folder (str): Directory to the folder containing the data. This folder must contain the following files X_train_10%.txt, X_train_20%.txt,..., X_train_100%.txt y_train_10%.txt, y_train_10%.txt,..., y_train_100%.txt Returns: (all_X_train, all_y_train) all_X_train is a list of 10 numpy arrays, with increasing number of rows but with the same number of columns (features) all_y_train is a list of 10 numpy arrays, with increasing number of elements Example usage >>> subsets_X, subsets_y = load_learning_curve_data("/path/to/folder/with/data") >>> for i, X in enumerate(subsets_X): >>> y = subsets_y[i] >>> # Train on X and y """ all_X_train = [] for percent in range(10, 101, 10): # For percent from 10, 20, ..., 100 X_file = f"{learning_curve_data_folder}/X_train_{percent}%.txt" X = utils.load_data_from_txt_file(X_file) all_X_train.append(np.asarray(X)) return np.asarray(all_X_train)
def load_data_custom(learning_curve_data_folder): """ I do what I want """ all_X_train = [] all_y_train = [] for percent in range(10, 101, 90): # For percent from 10, 20, ..., 100 X_file = f"{learning_curve_data_folder}/X_train_{percent}%.txt" y_file = f"{learning_curve_data_folder}/y_train_{percent}%.txt" X = utils.load_data_from_txt_file(X_file) y = utils.load_data_from_txt_file(y_file, True) all_X_train.append(X) all_y_train.append(y) return (all_X_train, all_y_train)
def generate_bmeans(): part_bmean = [] for num in range(1, 6): # For percent from 10, 20, ..., 100 X_file = f"P3/MeanInitialization/Part_b/mu_k_{num}.txt" X = utils.load_data_from_txt_file(X_file) part_bmean.append(np.asarray(X)) part_bmean = np.asarray(part_bmean) return part_bmean
def load_all_cross_validation_data(validation_data_folder): """ Load all data to do cross validation experiment Args: validation_data_folder (str): Directory to the folder containing the data This directory must contains 5 sub-directories: Fold1 Fold2 Fold3 Fold4 Fold5 Returns: all_folds (list) all_folds is a list of 5 elements. Each element is a tuple (X,y) where X is a numpy array of shape (N, d) y is a numpy array of shape (N,) Example usage: >>> all_folds = load_all_cross_validation_data("/path/to/folder/with/CV-data") >>> fold_number = 2 # Pick fold number 3 as leave out fold >>> leave_out_data, training_data = partition_cross_validation_fold(all_folds, fold_number) """ all_folds = [] for fold in [1, 2, 3, 4, 5]: X_file = f"{validation_data_folder}/X_train_fold{fold}.txt" y_file = f"{validation_data_folder}/X_test_fold{fold}.txt" X = utils.load_data_from_txt_file(X_file) y = utils.load_data_from_txt_file(y_file) all_folds.append((X, y)) return all_folds
# # Gaussian Mixture Models import numpy as np import matplotlib.pyplot as plt import utils from scipy.stats import multivariate_normal from gmm import GaussianMixtureModel X_test_all = utils.load_data_from_txt_file("P3/X_test.txt") test_data = utils.load_data_from_txt_file("P3/X_test.txt") def plot_contour_gaussian(ax, mean, covariance, eps=1e-2): """ Plot the contour of a 2d Gaussian distribution with given mean and covariance matrix Args: ax (matplotlib.axes.Axes): Subplot used to plot the contour mean (numpy.array): Mean of the gaussian distribution covariance (numpy.array): Covariance matrix of the distribution eps: The cut off to draw the contour plot. The higher the value, the smaller the contour plot. Returns: None """