Esempio n. 1
0
    def closed_form_extra_features(self):
        preprocess1 = Preprocess()

        x_set = preprocess1.matrixify(self.data, 60)
        y_set = Preprocess.get_y(self.data)
        lengths = []
        length_squared = []

        for datapoint in self.data:
            text_length = len(datapoint['text'])
            lengths.append(text_length)

        children_length_inter = []
        children_list = []
        log_children_list = []
        for datapoint in self.data:
            children_list.append(datapoint['children'])
            if datapoint['children'] != 0:
                log_children_list.append(math.log(datapoint['children']))
            else:
                log_children_list.append(0)

        for length, children in zip(lengths, children_list):
            children_length_inter.append(length * children)

        preprocess1.add_features(children_length_inter)
        x_set = preprocess1.add_features(log_children_list)
        x_set = feature_selector.backwardElimination(x_set, y_set, 0.1)
        return self.run_model(x_set, y_set)
 def test_add_features(self):
     preprocessor = Preprocess()
     preprocessor.preprocess(self.data)
     x_set = preprocessor.matrixify(self.data)
     new_feature = []
     other_feature = []
     for some_feature in self.data:
         new_feature.append(5)
     for some_other_feature in self.data:
         other_feature.append(3)
     x_set = preprocessor.add_features(new_feature)
     x_set = preprocessor.add_features(other_feature)
     self.assertEqual(x_set.shape,(self.test_size, 165))
Esempio n. 3
0
from Evaluator import Evaluator
from preprocess import Preprocess
import json
import feature_selector

with open("../src/proj1_data.json") as fp:
    data = json.load(fp)


preprocess1 = Preprocess()

Preprocess.preprocess(data)

num_words = 60

preprocess1.matrixify(data, num_words)
y_set = Preprocess.get_y(data)

children_length_inter = preprocess1.children_length_interaction(data)
log_children_list = preprocess1.log_children(data)


preprocess1.add_features(children_length_inter)
preprocess1.add_features(log_children_list)

x_set = preprocess1.feature_set
x_optimal = feature_selector.backwardElimination(x_set,y_set,0.15)
time, mse = Evaluator.evaluate_closed_form(x_optimal, y_set)
print(mse)
print(time)