def plurality_value(examples): """Return the most popular target value for this set of examples. (If target is binary, this is the majority; otherwise plurality.)""" popular = argmax_random_tie(values[target], key=lambda v: count(target, v, examples)) return DecisionLeaf(popular)
def plurality_value(examples): major = None i = 0 for v in values[target]: if (count(target, v, examples) > i): i = count(target, v, examples) major = v return DecisionLeaf(major)
def majority_value(examples): """Selects the most common output value among a setof examples, breaking ties randomly.""" i = 0 for v in values[target]: if (count(target, v, examples) > i): i = count(target, v, examples) major = v return DecisionLeaf(major)
def decision_tree_learning(examples, attrs, parent_examples=()): if len(examples) == 0: return plurality_value(parent_examples) elif all_same_class(examples): return DecisionLeaf(examples[0][target]) elif len(attrs) == 0: return plurality_value(examples) else: A = choose_attribute(attrs, examples) tree = DecisionFork(A, dataset.attrnames[A], plurality_value(examples)) for (v_k, exs) in split_by(A, examples): subtree = decision_tree_learning( exs, removeall(A, attrs), examples) tree.add(v_k, subtree) return tree
def decision_tree_learning(examples, attributes, parent_examples=()): if len(examples) == 0: return plurality_value(parent_examples) elif same_classification(examples): return DecisionLeaf(examples[0][target]) elif len(attributes) == 0: return plurality_value(examples) elif percent_error(examples) < error_threshold: return plurality_value(examples) else: a = importance(attributes, examples) tree = DecisionTree(a, dataset.attrnames[a]) for (val_i, exs_i) in split_by(a, examples): subtree = decision_tree_learning(exs_i, removeall(a, attributes), examples) tree.add(val_i, subtree) return tree
def decision_tree_learning(examples, attributes, m, parent_examples=()): if len(examples) == 0: return majority_value(parent_examples) elif same_classification(examples): return DecisionLeaf(examples[0][target]) elif len(attributes) == 0: return majority_value(examples) elif misclass_error(examples) < m: return majority_value(examples) else: A = pick_attribute(attributes, examples) tree = DecisionTree(A, dataset.attrnames[A]) nonlocal internal_nodes internal_nodes += 1 for (val_i, exs_i) in split(A, examples): subtree = decision_tree_learning(exs_i, removeall(A, attributes), m, examples) tree.add(val_i, subtree) return tree
def decision_tree_learning_continuos(examples, attributes, parent_examples=()): if len(examples) == 0: return majority_value(parent_examples) elif same_classification(examples): return DecisionLeaf(examples[0][target]) elif len(attributes) == 0: return majority_value(examples) else: A, treshold = pick_attribute_continuos(attributes, examples) tree = DecisionTreeContinuos(A, treshold, dataset.attrnames[A]) less_equal, greater_than = split_continuous(A, treshold, examples) subtree_le = decision_tree_learning_continuos( less_equal, removeall(A, attributes), examples) tree.add(treshold, False, subtree_le) subtree_gt = decision_tree_learning_continuos( greater_than, removeall(A, attributes), examples) tree.add(treshold, True, subtree_gt) return tree
def plurality_value(examples): """Return the most popular target value for this set of examples""" popular = argmax_random_tie(values[target], key=lambda v: count(target, v, examples)) return DecisionLeaf(popular)