def dt_learn(dataset, attrs, parent_dist=None): if not dataset: return Dt.Leaf(parent_dist.get_most_common()) dist = Distribution(dataset) if dist.is_leaf() or not attrs: return Dt.Leaf(dist.get_most_common()) else: attr = max_gain(dataset, dist, attrs) tree = Dt.Node(attr) for v in attr.domain: dv = [d for d in dataset if d.x[attr.index] == v] child_attrs = [a for a in attrs if a != attr] subtree = dt_learn(dv, child_attrs, dist) tree.add_child(subtree, v) return tree