Ejemplo n.º 1
0
"""
Creates a box plot of the features on the x axis against the respective values on the y axis
The shape and layout varies with plt.boxplot()
"""

import matplotlib.pyplot as plt
from HelpMethods import helpers, given

DATA_PATH = ""
SAVE_PATH = ""

yb, input_data, ids = given.load_csv_data(DATA_PATH)

cleaned = helpers.clean_matrix(input_data)

stand_data = helpers.standardize(cleaned)


def box_plot_features_values(data, save):
    plt.figure()
    plt.boxplot(data, 0, '')

    plt.xlabel("Feature")
    plt.ylabel("Value")
    plt.savefig(save + str("Values") + "-" + str("Features") + " plot.png",
                dpi=200)
    plt.clf()


box_plot_features_values(stand_data, SAVE_PATH)
Ejemplo n.º 2
0
from HelpMethods import helpers, given
from Implementations import implementations

# Our best submission, least squares using a polynomial expansion of 6.

TRAIN_DATA_PATH = ""
TEST_DATA_PATH = ""
PREDICTION_NAME = "Least Squares with degree 6."
degree = 6

# load training data
y_train, x_train, _ = given.load_csv_data(TRAIN_DATA_PATH)

# build the polynomial for training
training = helpers.build_poly(x_train, degree)

# calculating weights
weights, mse_train = implementations.least_squares(y_train, training)


# load test data
y_test, x_test, ids_test = helpers.load_csv_data(TEST_DATA_PATH)

# build the polynomial for testing
test = helpers.build_poly(x_test, degree)


prediction = given.predict_labels(weights, test)

given.create_csv_submission(ids_test, prediction, PREDICTION_NAME)
"""
Example of how we used split data to search for optimal hyper-parameters using a grid search.
"""

from HelpMethods import helpers, given

DATA_PATH = ""
SPLIT_RATIO = 0.8

# load the data
y_train, x_train, ids = given.load_csv_data(DATA_PATH)

#split the data with given ratio
train_x, train_y, test_x, test_y = helpers.split_data(x_train, y_train,
                                                      SPLIT_RATIO)

# standardize the train and test data
stand_train = helpers.standardize(train_x)
stand_test = helpers.standardize(test_x)

#Initializing lists of hyper-parameters we want to test
degrees = []
hyper_params = []

#Initializing decision variables
best_rmse = 1000000
best_degree = -1
best_hyperparam = -1

# grid search for best combination of hyperparameters. Can be extended with more for loops.
# change "REGRESSION_METHOD" with desired method.