예제 #1
0
def dt_learn(dataset, attrs, parent_dist=None):
    if not dataset:
        return Dt.Leaf(parent_dist.get_most_common())
    dist = Distribution(dataset)
    if dist.is_leaf() or not attrs:
        return Dt.Leaf(dist.get_most_common())
    else:
        attr = max_gain(dataset, dist, attrs)
        tree = Dt.Node(attr)
        for v in attr.domain:
            dv = [d for d in dataset if d.x[attr.index] == v]
            child_attrs = [a for a in attrs if a != attr]
            subtree = dt_learn(dv, child_attrs, dist)
            tree.add_child(subtree, v)
        return tree