コード例 #1
0
 def test_enact_best_split_basic(self):
     tn = TreeNode([[1.0, 2.0], [-2.0, 2.0]], [7.0, 4.0])
     tn.enact_best_split(min_data_per_node=1)
     self.assertEqual(0.0, tn.get_cost())
     self.assertEqual(0.0, tn.left_child.get_cost())
     self.assertEqual(0.0, tn.right_child.get_cost())
     self.assertEqual(1, tn.left_child.N)
     self.assertEqual(1, tn.right_child.N)
コード例 #2
0
K = 4
print_costs_during_training=False

training_accuracies = []
validation_accuracies = []

for k in range(K):
    training_fold   = training_dataframe.select(lambda i: i%K != k)
    validation_fold = training_dataframe.select(lambda i: i%K == k)

    training_predictors = makePredictors(training_fold)
    training_responses = makeResponses(training_fold)


    tn = TreeNode(training_predictors, training_responses)
    if print_costs_during_training: print("Cost: {}".format(tn.get_cost()))
    while tn.enact_best_split():
        if print_costs_during_training: print("Cost: {}".format(tn.get_cost()))
    
    print("Fold {}:".format(k))

    training_predictions = [tn.predict(x) for x in zip(training_predictors[0], training_predictors[1], training_predictors[2])]
    training_errors = [0.0 if abs(x-y) < 0.5 else 1.0 for (x,y) in zip(training_predictions, training_responses)]
    training_accuracy = 1.0 - np.mean(training_errors)

    print("In-sample accuracy: {}".format(training_accuracy))
    training_accuracies.append(training_accuracy)

    validation_predictors = makePredictors(validation_fold)
    validation_responses = makeResponses(validation_fold)