def test_post_process_node_many_children(): b = Builder() node = InternalNode([ LeafNode.build(Variable.build('a')), LeafNode.build(Variable.build('b')), LeafNode.build(Variable.build('c')) ]) as_list = node.to_list() result = b._post_process_node(node, False) assert result == [node] assert result[0].to_list() == as_list
def test_build_many_leaf_node_children(): b = Builder() children = [ LeafNode.build(Variable.build('a')), LeafNode.build(Variable.build('b')), LeafNode.build(Variable.build('c')) ] b.add_child(InternalNode.build(children)) result = b.build() assert type(result) is InternalNode assert len(result.get_children()) == len(children) for child in children: assert child in result.get_children()
def test_build_one_leaf_node_child(): b = Builder() child = LeafNode.build(Variable.build('a')) b.add_child(child) result = b.build() assert result is child
def test_simplify_node_with_many_children(): b = Builder() grandchildren = [ LeafNode.build(Variable.build('a')), LeafNode.build(Variable.build('b')), LeafNode.build(Variable.build('c')) ] for grandchild in grandchildren: b.add_child(InternalNode.build([grandchild])) result = b.build() assert type(result) is InternalNode assert len(result.get_children()) == len(grandchildren) for grandchild in grandchildren: assert grandchild in result.get_children()
def test_build_one_child_with_leaf_node_grandchild(): b = Builder() grandchild = LeafNode.build(Variable.build('a')) child = InternalNode.build([grandchild]) b.add_child(child) result = b.build() assert result is grandchild
def build_tree(self, rows, parent=None): '''takes the data and recursively builds a tree''' gain, best_question = self.find_best_split(rows) if gain == 0: return LeafNode(self.label_counts(rows)) true_rows, false_rows = self.partition(rows, best_question) true_branch = self.build_tree(true_rows) false_branch = self.build_tree(false_rows) return DecisionNode(best_question, true_branch, false_branch)
def parse(self, token_list): if token_list != [] and token_list[0].matches(self): state = ParseState.build(LeafNode.build(token_list[0]), token_list[1:]) else: state = FAILURE return state
def _create_subtree(self, data_list, avaliable_attributes): if len(avaliable_attributes) == 0: class_frequencies = self._calculate_class_frequencies(data_list) class_name, frequency = max( class_frequencies, key=lambda class_frequency_tuple: class_frequency_tuple[1]) return LeafNode(class_name) if self._is_pure(data_list): return LeafNode(self._get_first_object_class_name(data_list)) attribute_number = self._get_attribute_to_split_on( data_list, avaliable_attributes) attribute_values_with_subsets = self._split_set( data_list, attribute_number) avaliable_attributes.remove(attribute_number) attribute_value_with_node_pairs = list() for attribute_value, subset in attribute_values_with_subsets: node = self._create_subtree(subset, avaliable_attributes.copy()) attribute_value_with_node_pairs.append((attribute_value, node)) return AttributeNode(attribute_number, attribute_value_with_node_pairs)
def test_simplify_acts_on_all_children(): b = Builder() grandchildren = [ LeafNode.build(Variable.build('a')), LeafNode.build(Variable.build('b')), LeafNode.build(Variable.build('c')) ] children = [] for grandchild in grandchildren: node = InternalNode.build([grandchild]) b.add_child(node) children.append(node) b.simplify() for grandchild in grandchildren: assert grandchild in b._children for child in children: assert child not in b._children
def combine_leaves(self): target_list = [] for child in self.child: targetlist = self.child[child].combine_leaves() if len(set(targetlist)) == 1 and len(targetlist) != 1: self.child[child] = LeafNode(targetlist[0]) target_list.append(targetlist[0]) else: for target in targetlist: target_list.append(target) return target_list
def __create_child_node_objects(self, is_in_last_internal_node_row, next_parameter_info): """Performs creation of child nodes (Internal/Leaf). Generates a list of LeafNode child nodes if the current node is in the last row of internal nodes of the tree, represented by is_in_last_internal_node_row = False. Generates a list of InternalNode child nodes otherwise. Parameter To Value for each child node contains an additional parameter: NEXT_PARAMETER [the parameter which is next in the hierarchy].Adjacent child nodes differ in the value of the NEXT_PARAMETER by an amount = STEP_SIZE [of the NEXT_PARAMETER]. Args: is_in_last_internal_node_row: A boolean indicating whether the current node occupies the last row of internal nodes in the tree. next_parameter_info: A dictionary mapping info of the next parameter next in hierarchy to their values. This includes: <ol> <li> Name </li> <li> Minimum Value </li> <li> Maximum Value </li> <li> Range of values </li> <li> Step Size </li> </ol> Returns: A list of newly created child nodes (Internal/Leaf). """ child_nodes = [] child_node_parameter_to_value = self.parameter_to_value.copy() for next_parameter_value in range(next_parameter_info["min"], next_parameter_info["max"] + 1, next_parameter_info["step_size"]): child_node_parameter_to_value[ next_parameter_info["name"]] = next_parameter_value if is_in_last_internal_node_row: child_nodes.append(LeafNode(child_node_parameter_to_value)) else: child_nodes.append( InternalNode(self.depth_in_tree + 1, child_node_parameter_to_value)) return child_nodes, len(child_nodes)
def build(self, ex_train, de_train): self.ex = ex_train self.de = de_train # End case 1 # If there is only one class in the target data, return leafnode class_array = de_train.iloc[:, 0].unique() if len(class_array) == 1: if self.Isroot == False: return LeafNode(class_array[0]) else: self.node = ex_train.columns[0] self.child['end'] = LeafNode(class_array[0]) # find the column that have to best information gain gain, column = self.compute_gain(ex_train, de_train) # End case 2 # Check if there is any features worth parting the data left, if false # end with leaf node if column == 'none': if self.Isroot == False: return LeafNode(max(de_train.iloc[:, 0])) else: self.child['end'] = LeafNode((max(de_train.iloc[:, 0]))) return self.node = column # if gain is greater than 0, we will part the data for value in ex_train[column].unique(): parted_data = ex_train[ex_train[column] == value] # drop extra columns parted_data = self.drop_col(parted_data, column) parted_target = de_train[ex_train[column] == value] class_array = parted_target.iloc[:, 0].unique() column_array = [] for columns in parted_data: column_array.append(len(parted_data[columns].unique())) # End case 3 # if a group has only one target class, return leafnode if len(class_array) == 1: self.child[value] = LeafNode(class_array[0]) #End case 4 # if a group's variables only hold a value across all columns # append a leaf node elif sum(column_array) == parted_data.shape[1]: self.child[value] = LeafNode(max(parted_target.iloc[:, 0])) else: # if all end cases are false, create a DecisionNode and # and build brenches. self.child[value] = DecisionNode() self.child[value].build(parted_data, parted_target)
def test_post_process_node_one_child(): b = Builder() deep_leaf_node = LeafNode.build(Variable.build('a')) node = InternalNode.build([InternalNode.build([deep_leaf_node])]) assert b._post_process_node(node, False) == [deep_leaf_node]
def test_simplify_node_with_leaf_node(): b = Builder() child = LeafNode.build(Variable.build('a')) result = b._simplify_node(child) assert result == [child]