def test_enact_best_split_basic(self): tn = TreeNode([[1.0, 2.0], [-2.0, 2.0]], [7.0, 4.0]) tn.enact_best_split(min_data_per_node=1) self.assertEqual(0.0, tn.get_cost()) self.assertEqual(0.0, tn.left_child.get_cost()) self.assertEqual(0.0, tn.right_child.get_cost()) self.assertEqual(1, tn.left_child.N) self.assertEqual(1, tn.right_child.N)
K = 4 print_costs_during_training=False training_accuracies = [] validation_accuracies = [] for k in range(K): training_fold = training_dataframe.select(lambda i: i%K != k) validation_fold = training_dataframe.select(lambda i: i%K == k) training_predictors = makePredictors(training_fold) training_responses = makeResponses(training_fold) tn = TreeNode(training_predictors, training_responses) if print_costs_during_training: print("Cost: {}".format(tn.get_cost())) while tn.enact_best_split(): if print_costs_during_training: print("Cost: {}".format(tn.get_cost())) print("Fold {}:".format(k)) training_predictions = [tn.predict(x) for x in zip(training_predictors[0], training_predictors[1], training_predictors[2])] training_errors = [0.0 if abs(x-y) < 0.5 else 1.0 for (x,y) in zip(training_predictions, training_responses)] training_accuracy = 1.0 - np.mean(training_errors) print("In-sample accuracy: {}".format(training_accuracy)) training_accuracies.append(training_accuracy) validation_predictors = makePredictors(validation_fold) validation_responses = makeResponses(validation_fold)