def _build_tree_recursively(dataset): """ Private function used to build the decision tree in a recursive fashion. Args: dataset: model.DataSet The data at the current level of the tree. Lower levels of the tree have filtered subsets of the original data set. Returns: current_root: Node The node which is the root of the level being processed. For example, on the first/outermost call to this function the root node will be returned. Subsequent calls will return the various child nodes. """ label_set = set(dataset.get_labels()) if len(label_set) == 1: # All remaining samples have the same label, no need to split further return Node(label_set.pop()) if len(dataset.feature_list()) == 0: # No more features to split on return Node(get_most_common(dataset.get_labels())) # We can still split further split_feature = choose_feature_to_split(dataset) node = Node(split_feature) for value in dataset.get_feature_values(split_feature): subset = dataset.value_filter( split_feature, value).drop_column(split_feature) node.add_child(value, _build_tree_recursively(subset)) return node
def test_get_all_descendants(self): root_node = Node("Root") child1 = Node("Child1") child2 = Node("Child2") root_node.add_child("child1", child1) root_node.add_child("child2", child2) grandchild1 = Node("GC1") grandchild2 = Node("GC2") child2.add_child("child1", grandchild1) child2.add_child("child2", grandchild2) assert_that( root_node.get_all_descendants(), contains_inanyorder( child1, child2, grandchild1, grandchild2))
def _build_tree_recursively(dataset): """ Private function used to build the decision tree in a recursive fashion. Args: dataset: model.DataSet The data at the current level of the tree. Lower levels of the tree have filtered subsets of the original data set. Returns: current_root: Node The node which is the root of the level being processed. For example, on the first/outermost call to this function the root node will be returned. Subsequent calls will return the various child nodes. """ label_set = set(dataset.get_labels()) if len(label_set) == 1: # All remaining samples have the same label, no need to split further return Node(label_set.pop()) if len(dataset.feature_list()) == 0: # No more features to split on return Node(get_most_common(dataset.get_labels())) # We can still split further split_feature = choose_feature_to_split(dataset) node = Node(split_feature) for value in dataset.get_feature_values(split_feature): subset = dataset.value_filter(split_feature, value).drop_column(split_feature) node.add_child(value, _build_tree_recursively(subset)) return node
def create_tree_tennis(self): """ Creates a tree matching the play_tennis.data data's decision tree. """ root_node = Node("Outlook") humidity_node = Node("Humidity") high_humidity_node = Node("No") normal_humidity_node = Node("Yes") humidity_node.add_child("High", high_humidity_node) humidity_node.add_child("Normal", normal_humidity_node) root_node.add_child("Sunny", humidity_node) overcast_node = Node("Yes") root_node.add_child("Overcast", overcast_node) wind_node = Node("Wind") strong_wind_node = Node("No") weak_wind_node = Node("Yes") wind_node.add_child("Strong", strong_wind_node) wind_node.add_child("Weak", weak_wind_node) root_node.add_child("Rain", wind_node) return Tree(root_node)
def test_get_all_descendants_empty(self): root_node = Node("Root") self.assertListEqual(root_node.get_all_descendants(), [])
def test_get_branches_no_children(self): node = Node("test") self.assertListEqual(node.get_branches(), [])
def create_tree(self): """ Creates play_tennis.data decision tree. Returns: tree: Tree leaf_nodes: list(Node) """ root_node = Node("Outlook") humidity_node = Node("Humidity") high_humidity_node = Node("No") normal_humidity_node = Node("Yes") humidity_node.add_child("High", high_humidity_node) humidity_node.add_child("Normal", normal_humidity_node) root_node.add_child("Sunny", humidity_node) overcast_node = Node("Yes") root_node.add_child("Overcast", overcast_node) wind_node = Node("Wind") strong_wind_node = Node("No") weak_wind_node = Node("Yes") wind_node.add_child("Strong", strong_wind_node) wind_node.add_child("Weak", weak_wind_node) root_node.add_child("Rain", wind_node) leaves = [high_humidity_node, normal_humidity_node, overcast_node, strong_wind_node, weak_wind_node] return Tree(root_node), leaves
def test_get_all_descendants(self): root_node = Node("Root") child1 = Node("Child1") child2 = Node("Child2") root_node.add_child("child1", child1) root_node.add_child("child2", child2) grandchild1 = Node("GC1") grandchild2 = Node("GC2") child2.add_child("child1", grandchild1) child2.add_child("child2", grandchild2) assert_that( root_node.get_all_descendants(), contains_inanyorder(child1, child2, grandchild1, grandchild2))
def create_tree(self): """ Creates play_tennis.data decision tree. Returns: tree: Tree leaf_nodes: list(Node) """ root_node = Node("Outlook") humidity_node = Node("Humidity") high_humidity_node = Node("No") normal_humidity_node = Node("Yes") humidity_node.add_child("High", high_humidity_node) humidity_node.add_child("Normal", normal_humidity_node) root_node.add_child("Sunny", humidity_node) overcast_node = Node("Yes") root_node.add_child("Overcast", overcast_node) wind_node = Node("Wind") strong_wind_node = Node("No") weak_wind_node = Node("Yes") wind_node.add_child("Strong", strong_wind_node) wind_node.add_child("Weak", weak_wind_node) root_node.add_child("Rain", wind_node) leaves = [ high_humidity_node, normal_humidity_node, overcast_node, strong_wind_node, weak_wind_node ] return Tree(root_node), leaves