def build_decison_tree(attr_list, df1, heuristic): treeRoot = TreeNode() countsPN = count(df1) if countsPN[0] > countsPN[1]: treeRoot.add_majority_class(1) else: treeRoot.add_majority_class(0) if countsPN[0] == len(df1.index): treeRoot.classLabel = 1 elif countsPN[1] == len(df1.index): treeRoot.classLabel = 0 elif len(attr_list) == 0: if countsPN[0] > countsPN[1]: treeRoot.classLabel = 1 else: treeRoot.classLabel = 0 else: entropy = entropyFn(df1) variance_impurity = varianceimpurityFn(df1) chosen_attr_list = info_gain(entropy, variance_impurity, attr_list, df1, heuristic) chosen_attr = chosen_attr_list[heuristic] if chosen_attr is not None: treeRoot.add_attribute(chosen_attr) new_attr_list = [] for attr in attr_list: if attr != chosen_attr: new_attr_list.append(attr) attr_list = new_attr_list for i in range(2): df2 = df1[df1[chosen_attr] == i] if len(df2.index) != 0: if i == 0: treeRoot.add_left( build_decison_tree(attr_list, df2, heuristic)) else: treeRoot.add_right( build_decison_tree(attr_list, df2, heuristic)) else: treeChild = TreeNode() if countsPN[0] > countsPN[1]: treeChild.classLabel = 1 else: treeChild.classLabel = 0 if i == 0: treeRoot.add_left(treeChild) else: treeRoot.add_right(treeChild) else: if countsPN[0] > countsPN[1]: treeRoot.classLabel = 1 else: treeRoot.classLabel = 0 return treeRoot
def copy_tree(root): newRoot = TreeNode() newRoot.add_node_attributes(root) if root is None: return None if root.left is not None: newRoot.add_left(copy_tree(root.left)) if root.right is not None: newRoot.add_right(copy_tree(root.right)) node_list.append(newRoot) return newRoot