Example #1
0
def test_post_process_node_many_children():
    b = Builder()
    node = InternalNode([
        LeafNode.build(Variable.build('a')),
        LeafNode.build(Variable.build('b')),
        LeafNode.build(Variable.build('c'))
    ])
    as_list = node.to_list()

    result = b._post_process_node(node, False)
    assert result == [node]
    assert result[0].to_list() == as_list
Example #2
0
def test_build_many_leaf_node_children():
    b = Builder()
    children = [
        LeafNode.build(Variable.build('a')),
        LeafNode.build(Variable.build('b')),
        LeafNode.build(Variable.build('c'))
    ]
    b.add_child(InternalNode.build(children))
    result = b.build()

    assert type(result) is InternalNode
    assert len(result.get_children()) == len(children)
    for child in children:
        assert child in result.get_children()
Example #3
0
def test_build_one_leaf_node_child():
    b = Builder()
    child = LeafNode.build(Variable.build('a'))
    b.add_child(child)
    result = b.build()

    assert result is child
Example #4
0
def test_simplify_node_with_many_children():
    b = Builder()
    grandchildren = [
        LeafNode.build(Variable.build('a')),
        LeafNode.build(Variable.build('b')),
        LeafNode.build(Variable.build('c'))
    ]
    for grandchild in grandchildren:
        b.add_child(InternalNode.build([grandchild]))

    result = b.build()

    assert type(result) is InternalNode
    assert len(result.get_children()) == len(grandchildren)
    for grandchild in grandchildren:
        assert grandchild in result.get_children()
Example #5
0
def test_build_one_child_with_leaf_node_grandchild():
    b = Builder()
    grandchild = LeafNode.build(Variable.build('a'))
    child = InternalNode.build([grandchild])
    b.add_child(child)
    result = b.build()

    assert result is grandchild
Example #6
0
 def build_tree(self, rows, parent=None):
     '''takes the data and recursively builds a tree'''
     gain, best_question = self.find_best_split(rows)
     if gain == 0:
         return LeafNode(self.label_counts(rows))
     true_rows, false_rows = self.partition(rows, best_question)
     true_branch = self.build_tree(true_rows)
     false_branch = self.build_tree(false_rows)
     return DecisionNode(best_question, true_branch, false_branch)
Example #7
0
    def parse(self, token_list):

        if token_list != [] and token_list[0].matches(self):
            state = ParseState.build(LeafNode.build(token_list[0]),
                                     token_list[1:])
        else:
            state = FAILURE

        return state
Example #8
0
 def _create_subtree(self, data_list, avaliable_attributes):
     if len(avaliable_attributes) == 0:
         class_frequencies = self._calculate_class_frequencies(data_list)
         class_name, frequency = max(
             class_frequencies,
             key=lambda class_frequency_tuple: class_frequency_tuple[1])
         return LeafNode(class_name)
     if self._is_pure(data_list):
         return LeafNode(self._get_first_object_class_name(data_list))
     attribute_number = self._get_attribute_to_split_on(
         data_list, avaliable_attributes)
     attribute_values_with_subsets = self._split_set(
         data_list, attribute_number)
     avaliable_attributes.remove(attribute_number)
     attribute_value_with_node_pairs = list()
     for attribute_value, subset in attribute_values_with_subsets:
         node = self._create_subtree(subset, avaliable_attributes.copy())
         attribute_value_with_node_pairs.append((attribute_value, node))
     return AttributeNode(attribute_number, attribute_value_with_node_pairs)
Example #9
0
def test_simplify_acts_on_all_children():
    b = Builder()
    grandchildren = [
        LeafNode.build(Variable.build('a')),
        LeafNode.build(Variable.build('b')),
        LeafNode.build(Variable.build('c'))
    ]
    children = []
    for grandchild in grandchildren:
        node = InternalNode.build([grandchild])
        b.add_child(node)
        children.append(node)

    b.simplify()

    for grandchild in grandchildren:
        assert grandchild in b._children

    for child in children:
        assert child not in b._children
Example #10
0
 def combine_leaves(self):
     target_list = []
     for child in self.child:
         targetlist = self.child[child].combine_leaves()
         if len(set(targetlist)) == 1 and len(targetlist) != 1:
             self.child[child] = LeafNode(targetlist[0])
             target_list.append(targetlist[0])
         else:
             for target in targetlist:
                 target_list.append(target)
     return target_list
    def __create_child_node_objects(self, is_in_last_internal_node_row,
                                    next_parameter_info):
        """Performs creation of child nodes (Internal/Leaf).

        Generates a list of LeafNode child nodes if the current node is in the last row of 
        internal nodes of the tree, represented by is_in_last_internal_node_row = False. 
        Generates a list of InternalNode child nodes otherwise.

        Parameter To Value for each child node contains an additional parameter: 
        NEXT_PARAMETER [the parameter which is next in the hierarchy].Adjacent child 
        nodes differ in the value of the NEXT_PARAMETER by an amount = STEP_SIZE
        [of the NEXT_PARAMETER].

        Args:
            is_in_last_internal_node_row: A boolean indicating whether the current node 
                                        occupies the last row of internal nodes in the tree.
            next_parameter_info: A dictionary mapping info of the next parameter next in hierarchy
                                to their values. This includes:
                                    <ol>
                                    <li> Name </li>
                                    <li> Minimum Value </li>
                                    <li> Maximum Value </li>
                                    <li> Range of values </li>
                                    <li> Step Size </li>
                                    </ol>

        Returns:
            A list of newly created child nodes (Internal/Leaf).
        """

        child_nodes = []
        child_node_parameter_to_value = self.parameter_to_value.copy()

        for next_parameter_value in range(next_parameter_info["min"],
                                          next_parameter_info["max"] + 1,
                                          next_parameter_info["step_size"]):
            child_node_parameter_to_value[
                next_parameter_info["name"]] = next_parameter_value

            if is_in_last_internal_node_row:
                child_nodes.append(LeafNode(child_node_parameter_to_value))
            else:
                child_nodes.append(
                    InternalNode(self.depth_in_tree + 1,
                                 child_node_parameter_to_value))

        return child_nodes, len(child_nodes)
Example #12
0
    def build(self, ex_train, de_train):
        self.ex = ex_train
        self.de = de_train
        # End case 1
        # If there is only one class in the target data, return leafnode
        class_array = de_train.iloc[:, 0].unique()
        if len(class_array) == 1:
            if self.Isroot == False:
                return LeafNode(class_array[0])
            else:
                self.node = ex_train.columns[0]
                self.child['end'] = LeafNode(class_array[0])

        # find the column that have to best information gain
        gain, column = self.compute_gain(ex_train, de_train)
        # End case 2
        # Check if there is any features worth parting the data left, if false
        # end with leaf node
        if column == 'none':
            if self.Isroot == False:
                return LeafNode(max(de_train.iloc[:, 0]))
            else:
                self.child['end'] = LeafNode((max(de_train.iloc[:, 0])))
                return

        self.node = column
        # if gain is greater than 0, we will part the data
        for value in ex_train[column].unique():
            parted_data = ex_train[ex_train[column] == value]
            # drop extra columns
            parted_data = self.drop_col(parted_data, column)
            parted_target = de_train[ex_train[column] == value]

            class_array = parted_target.iloc[:, 0].unique()
            column_array = []
            for columns in parted_data:
                column_array.append(len(parted_data[columns].unique()))
            # End case 3
            # if a group has only one target class, return leafnode
            if len(class_array) == 1:
                self.child[value] = LeafNode(class_array[0])
            #End case 4
            # if a group's variables only hold a value across all columns
            # append a leaf node
            elif sum(column_array) == parted_data.shape[1]:
                self.child[value] = LeafNode(max(parted_target.iloc[:, 0]))
            else:
                # if all end cases are false,  create a DecisionNode and
                # and build brenches.
                self.child[value] = DecisionNode()
                self.child[value].build(parted_data, parted_target)
Example #13
0
def test_post_process_node_one_child():
    b = Builder()
    deep_leaf_node = LeafNode.build(Variable.build('a'))
    node = InternalNode.build([InternalNode.build([deep_leaf_node])])

    assert b._post_process_node(node, False) == [deep_leaf_node]
Example #14
0
def test_simplify_node_with_leaf_node():
    b = Builder()
    child = LeafNode.build(Variable.build('a'))
    result = b._simplify_node(child)

    assert result == [child]