""" Creates a box plot of the features on the x axis against the respective values on the y axis The shape and layout varies with plt.boxplot() """ import matplotlib.pyplot as plt from HelpMethods import helpers, given DATA_PATH = "" SAVE_PATH = "" yb, input_data, ids = given.load_csv_data(DATA_PATH) cleaned = helpers.clean_matrix(input_data) stand_data = helpers.standardize(cleaned) def box_plot_features_values(data, save): plt.figure() plt.boxplot(data, 0, '') plt.xlabel("Feature") plt.ylabel("Value") plt.savefig(save + str("Values") + "-" + str("Features") + " plot.png", dpi=200) plt.clf() box_plot_features_values(stand_data, SAVE_PATH)
from HelpMethods import helpers, given from Implementations import implementations # Our best submission, least squares using a polynomial expansion of 6. TRAIN_DATA_PATH = "" TEST_DATA_PATH = "" PREDICTION_NAME = "Least Squares with degree 6." degree = 6 # load training data y_train, x_train, _ = given.load_csv_data(TRAIN_DATA_PATH) # build the polynomial for training training = helpers.build_poly(x_train, degree) # calculating weights weights, mse_train = implementations.least_squares(y_train, training) # load test data y_test, x_test, ids_test = helpers.load_csv_data(TEST_DATA_PATH) # build the polynomial for testing test = helpers.build_poly(x_test, degree) prediction = given.predict_labels(weights, test) given.create_csv_submission(ids_test, prediction, PREDICTION_NAME)
""" Example of how we used split data to search for optimal hyper-parameters using a grid search. """ from HelpMethods import helpers, given DATA_PATH = "" SPLIT_RATIO = 0.8 # load the data y_train, x_train, ids = given.load_csv_data(DATA_PATH) #split the data with given ratio train_x, train_y, test_x, test_y = helpers.split_data(x_train, y_train, SPLIT_RATIO) # standardize the train and test data stand_train = helpers.standardize(train_x) stand_test = helpers.standardize(test_x) #Initializing lists of hyper-parameters we want to test degrees = [] hyper_params = [] #Initializing decision variables best_rmse = 1000000 best_degree = -1 best_hyperparam = -1 # grid search for best combination of hyperparameters. Can be extended with more for loops. # change "REGRESSION_METHOD" with desired method.