Ejemplo n.º 1
0
 def test_enact_best_split_basic(self):
     tn = TreeNode([[1.0, 2.0], [-2.0, 2.0]], [7.0, 4.0])
     tn.enact_best_split(min_data_per_node=1)
     self.assertEqual(0.0, tn.get_cost())
     self.assertEqual(0.0, tn.left_child.get_cost())
     self.assertEqual(0.0, tn.right_child.get_cost())
     self.assertEqual(1, tn.left_child.N)
     self.assertEqual(1, tn.right_child.N)
Ejemplo n.º 2
0
 def test_predict_checkerboard(self):
     x1_vals = np.arange(0, 1.0, 0.05).tolist()
     x2_vals = np.arange(0, 1.0, 0.05).tolist()
     x0predictors = [x for x in x1_vals for y in x2_vals]
     x1predictors = [y for x in x1_vals for y in x2_vals]
     predictors = [x0predictors, x1predictors]
     responses = [(0.0 if x < 0.5 else 0.1) if y < 0.5 else
                  (3.1 if x < 0.5 else 2.9) for x in x1_vals
                  for y in x2_vals]
     tn = TreeNode(predictors, responses)
     tn.enact_best_split()
     tn.enact_best_split()
     tn.enact_best_split()
     self.assertEqual(0.0, tn.predict([0.05, 0.05]))
     self.assertAlmostEqual(3.1, tn.predict([0.05, 0.95]))
     self.assertAlmostEqual(0.1, tn.predict([0.95, 0.05]))
     self.assertAlmostEqual(2.9, tn.predict([0.95, 0.95]))
Ejemplo n.º 3
0
 def test_checkerboard_split(self):
     x1_vals = np.arange(0, 1.0, 0.05).tolist()
     x2_vals = np.arange(0, 1.0, 0.05).tolist()
     x0predictors = [x for x in x1_vals for y in x2_vals]
     x1predictors = [y for x in x1_vals for y in x2_vals]
     predictors = [x0predictors, x1predictors]
     responses = [(0.0 if x < 0.5 else 0.1) if y < 0.5 else
                  (3.1 if x < 0.5 else 2.9) for x in x1_vals
                  for y in x2_vals]
     tn = TreeNode(predictors, responses)
     self.assertEqual(True, tn.enact_best_split())
     self.assertEqual(0.05, tn.left_child.unsplit_prediction)
     self.assertEqual(3.00, tn.right_child.unsplit_prediction)
     self.assertEqual(True, tn.enact_best_split())
     self.assertEqual(True, tn.enact_best_split())
     self.assertEqual(0.0, tn.left_child.left_child.unsplit_prediction)
     self.assertAlmostEqual(0.1,
                            tn.left_child.right_child.unsplit_prediction)
     self.assertAlmostEqual(3.1,
                            tn.right_child.left_child.unsplit_prediction)
     self.assertAlmostEqual(2.9,
                            tn.right_child.right_child.unsplit_prediction)
     self.assertEqual(False, tn.enact_best_split())
Ejemplo n.º 4
0
print_costs_during_training=False

training_accuracies = []
validation_accuracies = []

for k in range(K):
    training_fold   = training_dataframe.select(lambda i: i%K != k)
    validation_fold = training_dataframe.select(lambda i: i%K == k)

    training_predictors = makePredictors(training_fold)
    training_responses = makeResponses(training_fold)


    tn = TreeNode(training_predictors, training_responses)
    if print_costs_during_training: print("Cost: {}".format(tn.get_cost()))
    while tn.enact_best_split():
        if print_costs_during_training: print("Cost: {}".format(tn.get_cost()))
    
    print("Fold {}:".format(k))

    training_predictions = [tn.predict(x) for x in zip(training_predictors[0], training_predictors[1], training_predictors[2])]
    training_errors = [0.0 if abs(x-y) < 0.5 else 1.0 for (x,y) in zip(training_predictions, training_responses)]
    training_accuracy = 1.0 - np.mean(training_errors)

    print("In-sample accuracy: {}".format(training_accuracy))
    training_accuracies.append(training_accuracy)

    validation_predictors = makePredictors(validation_fold)
    validation_responses = makeResponses(validation_fold)

    validation_predictions = [tn.predict(x) for x in zip(validation_predictors[0], validation_predictors[1], validation_predictors[2])]