x_set = preprocess.matrixify(data, 160)
y_set = preprocess.get_y(data)

error_list_closed = []
error_list_gradient = []

time_list_closed = []
time_list_gradient = []

# Computing our results
feature_range = np.arange(160)

for x in feature_range:
    current_x_set = x_set[:, :3 + x]
    print("using " + str(x) + " features")
    time_closed, performance_close = Evaluator.evaluate_closed_form(
        current_x_set, y_set)
    time_grad, performance_grad = Evaluator.gradient_evaluator(
        current_x_set, y_set)

    error_list_closed.append(performance_close)
    error_list_gradient.append(performance_grad)

    time_list_closed.append(time_closed)
    time_list_gradient.append(time_grad)

# Plotting our results
size = 5
plt.subplot(211)
plt.title('MSE vs number of features')
plt.scatter(feature_range,
            error_list_gradient,
Beispiel #2
0
from Evaluator import Evaluator
from preprocess import Preprocess
import json
import feature_selector

with open("../src/proj1_data.json") as fp:
    data = json.load(fp)


preprocess1 = Preprocess()

Preprocess.preprocess(data)

num_words = 60

preprocess1.matrixify(data, num_words)
y_set = Preprocess.get_y(data)

children_length_inter = preprocess1.children_length_interaction(data)
log_children_list = preprocess1.log_children(data)


preprocess1.add_features(children_length_inter)
preprocess1.add_features(log_children_list)

x_set = preprocess1.feature_set
x_optimal = feature_selector.backwardElimination(x_set,y_set,0.15)
time, mse = Evaluator.evaluate_closed_form(x_optimal, y_set)
print(mse)
print(time)