def compare_dataset_size(train_datasets, test_dataset, K=10, out_dir="."): """ Generate plot to compare effects of dataset size. Args: - train_datasets (list of dict): A list of training datasets from the same distribution. - test_dataset (dict): The test dataset. - K (int): The degree of the polynomial to fit. Note: 1 <= K <= 10 """ model = PolynomialRegression(K=K) title = "Comparing Effects of Dataset Size" x_label = "Dataset size" y_label = "Error (Log Scale)" # One for training error, one for testing error labels = ("Train Error", "Test Error") x_s = [[], []] y_s = [[], []] test_X = test_dataset["X"] test_Y = test_dataset["Y"] for dataset in train_datasets: train_X = dataset["X"] train_Y = dataset["Y"] num_samples = len(train_X) x_s[0].append(num_samples) x_s[1].append(num_samples) model.fit(train_X, train_Y) train_loss = model.compute_mse(train_X, train_Y) test_loss = model.compute_mse(test_X, test_Y) y_s[0].append(np.log(train_loss)) y_s[1].append(np.log(test_loss)) visualize(x_s, y_s, labels, title, x_label, y_label, savefig=True, out_dir=out_dir)
def compare_regularization(train_dataset, test_dataset, K, l2_coefs, title_prefix="", out_dir="."): """ Generate plot to compare effects of model complexity """ title = f"{title_prefix}Comparing Effects of Regularization" x_label = "L2 Coefficient (Lambda Term) 1e-2" y_label = "Error (Log Scale)" labels = ("Train Error", "Test Error") x_s = [[], []] y_s = [[], []] train_X = train_dataset["X"] train_Y = train_dataset["Y"] test_X = test_dataset["X"] test_Y = test_dataset["Y"] for l2_coef in l2_coefs: x_s[0].append(l2_coef * 1e2) x_s[1].append(l2_coef * 1e2) model = PolynomialRegression(K) model.fit_with_l2_regularization(train_X, train_Y, l2_coef) train_loss = model.compute_mse(train_X, train_Y) test_loss = model.compute_mse(test_X, test_Y) y_s[0].append(np.log(train_loss)) y_s[1].append(np.log(test_loss)) visualize(x_s, y_s, labels, title, x_label, y_label, savefig=True, out_dir=out_dir)
def compare_model_complexity(train_dataset, test_dataset, Ks, title_prefix="", out_dir="."): """ Generate plot to compare effects of model complexity """ title = f"{title_prefix}Comparing Effects of Model Complexity" x_label = "Model Complexity (Degree of Polynomial)" y_label = "Error (Log Scale)" labels = ("Train Error", "Test Error") x_s = [[], []] y_s = [[], []] train_X = train_dataset["X"] train_Y = train_dataset["Y"] test_X = test_dataset["X"] test_Y = test_dataset["Y"] for K in Ks: x_s[0].append(K) x_s[1].append(K) model = PolynomialRegression(K) model.fit(train_X, train_Y) train_loss = model.compute_mse(train_X, train_Y) test_loss = model.compute_mse(test_X, test_Y) y_s[0].append(np.log(train_loss)) y_s[1].append(np.log(test_loss)) visualize(x_s, y_s, labels, title, x_label, y_label, savefig=True, out_dir=out_dir)