def decision_tree_learning(examples, attributes, binary_targets): if same_binary_targets(binary_targets): return TreeNode.create_leaf(binary_targets[0], 0) elif len(attributes) == 0: counter = collections.Counter(binary_targets) return TreeNode.create_leaf(majority_value(binary_targets), get_entropy(counter[1], counter[0])) else: best_attribute = choose_best_decision_attribute(examples, attributes, binary_targets) tree = TreeNode.create_internal(best_attribute) for v in [0, 1]: v_examples = [] v_binary_targets = [] for i in range(0, len(examples)): example = examples[i] if example[best_attribute - 1] == v: v_examples.append(example) v_binary_targets.append(binary_targets[i]) if len(v_examples) == 0: counter = collections.Counter(binary_targets) return TreeNode.create_leaf(majority_value(binary_targets), get_entropy(counter[1], counter[0])) else: attributes.remove(best_attribute) subtree = decision_tree_learning(v_examples, attributes, v_binary_targets) tree.add_kid(subtree) attributes.append(best_attribute) return tree
def test_debug(): # 1 # |-2 # |-3 # | |-4 # | |-5 # |-6 tree_test_case = \ TreeNode.create_internal("1").add_kid(TreeNode.create_leaf("2")) \ .add_kid(TreeNode.create_internal("3").add_kid(TreeNode.create_internal("4").add_kid(TreeNode.create_leaf("5")))) \ .add_kid(TreeNode.create_leaf("6")) dump_tree("Test", tree_test_case)