def Branch(dataset, default, attributes): if not dataset: return dtree.TreeLeaf(default) if dtree.allPositive(dataset): return dtree.TreeLeaf(True) if dtree.allNegative(dataset): return dtree.TreeLeaf(False) return Tree(dataset, attributes, maxdepth - 1)
def makeTree(set, level, attributes): if level >= depth: return dtree.TreeLeaf(dtree.mostCommon(set)) attr = dtree.bestAttribute(set, attributes) node = [] branches = [] for val in attr.values: subset = dtree.select(set, attr, val) attributes_left = [a for a in attributes if a != attr] if dtree.allPositive(subset): node = dtree.TreeLeaf(True) elif dtree.allNegative(subset): node = dtree.TreeLeaf(False) else: node = makeTree(subset, level + 1, attributes_left) branches.append((val, node)) node = dtree.TreeNode(attr, dict(branches), dtree.mostCommon(set)) return node
def buildtree(dataset, remaining_attr, level): if level == 2: return dtree.TreeLeaf(dtree.mostCommon(dataset)) max_attr, _ = getMaxGain(dataset, remaining_attr) branches_dict = dict([(value, dtree.select(dataset, max_attr, value)) for value in max_attr.values]) _remaining_attr = [a for a in remaining_attr if a != max_attr] branches_nodes = {} print(max_attr) for value, branch_data in branches_dict.items(): branches_nodes[value] = buildtree(branch_data, _remaining_attr, level + 1) return dtree.TreeNode(max_attr, branches_nodes, dtree.TreeLeaf(dtree.mostCommon(dataset)))
def caspersky(dataset): print("Assignment 3") a = d.bestAttribute(dataset, m.attributes) branches = [] for v in a.values: s = d.select(dataset, a, v) tf = d.mostCommon(s) if tf == True: branches.append((v, d.TreeLeaf(s))) else: a2 = d.bestAttribute(s, m.attributes) branches2 = [] for v2 in a2.values: s2 = d.select(s, a2, v2) branches2.append((v2, d.TreeLeaf(d.mostCommon(s2)))) branches.append((v, d.TreeNode(a2, dict(branches2), d.mostCommon(s)))) drawtree.drawTree(d.TreeNode(a, dict(branches), d.mostCommon(dataset)))
def Tree(dataset, attributes, maxdepth=3): def Branch(dataset, default, attributes): if not dataset: return dtree.TreeLeaf(default) if dtree.allPositive(dataset): return dtree.TreeLeaf(True) if dtree.allNegative(dataset): return dtree.TreeLeaf(False) return Tree(dataset, attributes, maxdepth - 1) default = dtree.mostCommon(dataset) if maxdepth < 1: return dtree.TreeLeaf(default) a = dtree.bestAttribute(dataset, attributes) attributesLeft = [x for x in attributes if x != a] branches = [(v, Branch(dtree.select(dataset, a, v), default, attributesLeft)) for v in a.values] return dtree.TreeNode(a, dict(branches), default)