Example #1
0
def _build_tree_recursively(dataset):
    """
    Private function used to build the decision tree in a recursive fashion.
    
    Args:
      dataset: model.DataSet
        The data at the current level of the tree.  Lower levels of the tree 
        have filtered subsets of the original data set.
    
    Returns:
      current_root: Node
        The node which is the root of the level being processed.  For 
        example, on the first/outermost call to this function the root 
        node will be returned.  Subsequent calls will return the various 
        child nodes.
    """
    label_set = set(dataset.get_labels())
    if len(label_set) == 1:
        # All remaining samples have the same label, no need to split further
        return Node(label_set.pop())
    
    if len(dataset.feature_list()) == 0:
        # No more features to split on
        return Node(get_most_common(dataset.get_labels()))

    # We can still split further
    split_feature = choose_feature_to_split(dataset)
    
    node = Node(split_feature)
    
    for value in dataset.get_feature_values(split_feature):
        subset = dataset.value_filter(
                            split_feature, value).drop_column(split_feature)
        node.add_child(value, _build_tree_recursively(subset))
    
    return node
Example #2
0
 def test_get_all_descendants(self):
     root_node = Node("Root")
     child1 = Node("Child1")
     child2 = Node("Child2")
     root_node.add_child("child1", child1)
     root_node.add_child("child2", child2)
     
     grandchild1 = Node("GC1")
     grandchild2 = Node("GC2")
     child2.add_child("child1", grandchild1)
     child2.add_child("child2", grandchild2)
     
     assert_that(
         root_node.get_all_descendants(),
         contains_inanyorder(
             child1, child2, grandchild1, grandchild2))
Example #3
0
def _build_tree_recursively(dataset):
    """
    Private function used to build the decision tree in a recursive fashion.
    
    Args:
      dataset: model.DataSet
        The data at the current level of the tree.  Lower levels of the tree 
        have filtered subsets of the original data set.
    
    Returns:
      current_root: Node
        The node which is the root of the level being processed.  For 
        example, on the first/outermost call to this function the root 
        node will be returned.  Subsequent calls will return the various 
        child nodes.
    """
    label_set = set(dataset.get_labels())
    if len(label_set) == 1:
        # All remaining samples have the same label, no need to split further
        return Node(label_set.pop())

    if len(dataset.feature_list()) == 0:
        # No more features to split on
        return Node(get_most_common(dataset.get_labels()))

    # We can still split further
    split_feature = choose_feature_to_split(dataset)

    node = Node(split_feature)

    for value in dataset.get_feature_values(split_feature):
        subset = dataset.value_filter(split_feature,
                                      value).drop_column(split_feature)
        node.add_child(value, _build_tree_recursively(subset))

    return node
Example #4
0
    def create_tree_tennis(self):
        """
        Creates a tree matching the play_tennis.data data's decision tree.
        """
        root_node = Node("Outlook")

        humidity_node = Node("Humidity")
        high_humidity_node = Node("No")
        normal_humidity_node = Node("Yes")
        humidity_node.add_child("High", high_humidity_node)
        humidity_node.add_child("Normal", normal_humidity_node)
        root_node.add_child("Sunny", humidity_node)

        overcast_node = Node("Yes")
        root_node.add_child("Overcast", overcast_node)

        wind_node = Node("Wind")
        strong_wind_node = Node("No")
        weak_wind_node = Node("Yes")
        wind_node.add_child("Strong", strong_wind_node)
        wind_node.add_child("Weak", weak_wind_node)
        root_node.add_child("Rain", wind_node)

        return Tree(root_node)
Example #5
0
 def create_tree_tennis(self):
     """
     Creates a tree matching the play_tennis.data data's decision tree.
     """
     root_node = Node("Outlook")
     
     humidity_node = Node("Humidity")
     high_humidity_node = Node("No")
     normal_humidity_node = Node("Yes")
     humidity_node.add_child("High", high_humidity_node)
     humidity_node.add_child("Normal", normal_humidity_node)
     root_node.add_child("Sunny", humidity_node)
     
     overcast_node = Node("Yes")
     root_node.add_child("Overcast", overcast_node)
     
     wind_node = Node("Wind")
     strong_wind_node = Node("No")
     weak_wind_node = Node("Yes")
     wind_node.add_child("Strong", strong_wind_node)
     wind_node.add_child("Weak", weak_wind_node)
     root_node.add_child("Rain", wind_node)
     
     return Tree(root_node)
Example #6
0
 def test_get_all_descendants_empty(self):
     root_node = Node("Root")
     self.assertListEqual(root_node.get_all_descendants(), [])
Example #7
0
 def test_get_branches_no_children(self):
     node = Node("test")
     self.assertListEqual(node.get_branches(), [])
Example #8
0
 def create_tree(self):
     """
     Creates play_tennis.data decision tree.
     
     Returns:
       tree: Tree
       leaf_nodes: list(Node)
     """
     root_node = Node("Outlook")
     
     humidity_node = Node("Humidity")
     high_humidity_node = Node("No")
     normal_humidity_node = Node("Yes")
     humidity_node.add_child("High", high_humidity_node)
     humidity_node.add_child("Normal", normal_humidity_node)
     root_node.add_child("Sunny", humidity_node)
     
     overcast_node = Node("Yes")
     root_node.add_child("Overcast", overcast_node)
     
     wind_node = Node("Wind")
     strong_wind_node = Node("No")
     weak_wind_node = Node("Yes")
     wind_node.add_child("Strong", strong_wind_node)
     wind_node.add_child("Weak", weak_wind_node)
     root_node.add_child("Rain", wind_node)
     
     leaves = [high_humidity_node, normal_humidity_node, overcast_node, 
               strong_wind_node, weak_wind_node]
     
     return Tree(root_node), leaves
Example #9
0
 def test_get_all_descendants_empty(self):
     root_node = Node("Root")
     self.assertListEqual(root_node.get_all_descendants(), [])
Example #10
0
    def test_get_all_descendants(self):
        root_node = Node("Root")
        child1 = Node("Child1")
        child2 = Node("Child2")
        root_node.add_child("child1", child1)
        root_node.add_child("child2", child2)

        grandchild1 = Node("GC1")
        grandchild2 = Node("GC2")
        child2.add_child("child1", grandchild1)
        child2.add_child("child2", grandchild2)

        assert_that(
            root_node.get_all_descendants(),
            contains_inanyorder(child1, child2, grandchild1, grandchild2))
Example #11
0
 def test_get_branches_no_children(self):
     node = Node("test")
     self.assertListEqual(node.get_branches(), [])
Example #12
0
    def create_tree(self):
        """
        Creates play_tennis.data decision tree.
        
        Returns:
          tree: Tree
          leaf_nodes: list(Node)
        """
        root_node = Node("Outlook")

        humidity_node = Node("Humidity")
        high_humidity_node = Node("No")
        normal_humidity_node = Node("Yes")
        humidity_node.add_child("High", high_humidity_node)
        humidity_node.add_child("Normal", normal_humidity_node)
        root_node.add_child("Sunny", humidity_node)

        overcast_node = Node("Yes")
        root_node.add_child("Overcast", overcast_node)

        wind_node = Node("Wind")
        strong_wind_node = Node("No")
        weak_wind_node = Node("Yes")
        wind_node.add_child("Strong", strong_wind_node)
        wind_node.add_child("Weak", weak_wind_node)
        root_node.add_child("Rain", wind_node)

        leaves = [
            high_humidity_node, normal_humidity_node, overcast_node,
            strong_wind_node, weak_wind_node
        ]

        return Tree(root_node), leaves