def build_tree() -> DecisionNode: question = Question(0, 'Red', 'color') true_branch = Leaf([['Red', 1, 'Grape'], ['Red', 1, 'Grape']]) child_question = Question(0, 'Yellow', 'color') child_true_branch = Leaf([['Yellow', 3, 'Apple'], ['Yellow', 3, 'Lemon']]) child_false_branch = Leaf([['Green', 3, 'Apple']]) false_branch = DecisionNode(child_question, child_true_branch, child_false_branch) return DecisionNode(question, true_branch, false_branch)
def test_info_gain_with_better_split(self): expected_info_gain = 0.3733333 question = Question(0, 'Red') true_rows, false_rows = DecisionTree._partition( self.training_data, question) current_uncertainty = DecisionTree._gini(self.training_data) info_gain = DecisionTree.info_gain(true_rows, false_rows, current_uncertainty) self.assertAlmostEqual(expected_info_gain, info_gain)
def test_info_gain(self): expected_info_gain = 0.14 question = Question(0, 'Green') true_rows, false_rows = DecisionTree._partition( self.training_data, question) current_uncertainty = DecisionTree._gini(self.training_data) info_gain = DecisionTree.info_gain(true_rows, false_rows, current_uncertainty) self.assertAlmostEqual(expected_info_gain, info_gain)
def test_decision_node_constructor(self): question = Question(0, 'Red') true_branch = [['Red', 1, 'Grape'], ['Red', 1, 'Grape']] false_branch = [['Green', 3, 'Apple'], ['Yellow', 3, 'Apple'], ['Yellow', 3, 'Lemon']] decision_node = DecisionNode(question, true_branch, false_branch) self.assertEqual(question, decision_node.question) self.assertEqual(true_branch, decision_node.true_branch) self.assertEqual(false_branch, decision_node.false_branch)
def test_partition(self): expected_true_rows = [['Red', 1, 'Grape'], ['Red', 1, 'Grape']] expected_false_rows = [['Green', 3, 'Apple'], ['Yellow', 3, 'Apple'], ['Yellow', 3, 'Lemon']] question = Question(0, 'Red') true_rows, false_rows = DecisionTree._partition( self.training_data, question) self.assertEqual(expected_true_rows, true_rows) self.assertEqual(expected_false_rows, false_rows)
def test_compare_trees_not_equal(self): tree = self.build_tree() question = Question(0, 'Red') false_branch = Leaf([['Yellow', 3, 'Apple'], ['Yellow', 3, 'Lemon']]) not_equal_tree = DecisionNode(question, None, false_branch) self.assertFalse(compare_trees(tree, not_equal_tree))
def test_find_best_split(self): expected_best_question = Question(1, 3) best_info_gain, best_question = self.decision_tree._find_best_split( self.training_data) self.assertEqual(expected_best_question, best_question)
def test_repr(self): expected_repr = 'Is feature 0 == Red?' question = Question(0, 'Red') self.assertEqual(expected_repr, repr(question))
def test_repr_with_numeric_value(self): expected_repr = 'Is feature 1 >= 2?' question = Question(1, 2) self.assertEqual(expected_repr, repr(question))
def test_does_not_match_with_numeric_value(self): question = Question(1, 4) example = self.training_data[0] self.assertFalse(question.match(example))
def test_match_with_numeric_value(self): question = Question(1, 3) example = self.training_data[0] self.assertTrue(question.match(example))
def test_does_not_match(self): question = Question(0, 'Red') example = self.training_data[0] self.assertFalse(question.match(example))
def test_match(self): question = Question(0, 'Green') example = self.training_data[0] self.assertTrue(question.match(example))
def test_question_constructor(self): question = Question(0, 'Green') self.assertEqual(question.column_index, 0) self.assertEqual(question.value, 'Green')