def Branch(dataset, default, attributes): if not dataset: return dtree.TreeLeaf(default) if dtree.allPositive(dataset): return dtree.TreeLeaf(True) if dtree.allNegative(dataset): return dtree.TreeLeaf(False) return Tree(dataset, attributes, maxdepth - 1)
def buildTree(subset,attrs): global tree if isLeaf(subset): tree = (tree + '+') if d.allPositive(subset) else (tree + '-') return else: root = d.bestAttribute(subset,attrs) tree = tree + str(root) + "(" for value in root.values: nextSubset = d.select(subset,root,value) nextAttrs = attrs - set([root]) buildTree(nextSubset,nextAttrs) tree = tree + ")"
def makeTree(set, level, attributes): if level >= depth: return dtree.TreeLeaf(dtree.mostCommon(set)) attr = dtree.bestAttribute(set, attributes) node = [] branches = [] for val in attr.values: subset = dtree.select(set, attr, val) attributes_left = [a for a in attributes if a != attr] if dtree.allPositive(subset): node = dtree.TreeLeaf(True) elif dtree.allNegative(subset): node = dtree.TreeLeaf(False) else: node = makeTree(subset, level + 1, attributes_left) branches.append((val, node)) node = dtree.TreeNode(attr, dict(branches), dtree.mostCommon(set)) return node
def buildTreeRec(dataset, attributes, depthtodo): defaultvalue = d.mostCommon(dataset) if d.allPositive(dataset): return d.TreeLeaf(True) elif d.allNegative(dataset): return d.TreeLeaf(False) elif (depthtodo <= 0): return d.TreeLeaf(defaultvalue) else: gainziplist = calculateGainTuplesForAllAttributes(dataset, attributes) maxgain, maxgainattribute = getTupleWithMaxGainValue(gainziplist) subnodes = [] for attrbutevalue in attributes[maxgainattribute].values: newdataset = d.select(dataset, attributes[maxgainattribute], attrbutevalue) subnode = buildTreeRec(newdataset, attributes, depthtodo - 1) subnodes.append((attrbutevalue, subnode)) return d.TreeNode(attributes[maxgainattribute], dict(subnodes), defaultvalue)
def isLeaf(subset): return d.allPositive(subset) or d.allNegative(subset)
print("Information gains MONK-2:", gains) print("Best attribute for split:", max(gains, key=gains.get)) print() gains = dict( zip(md.attributes, [dt.averageGain(md.monk3, a) for a in md.attributes])) print("Information gains MONK-3:", gains) print("Best attribute for split:", max(gains, key=gains.get)) print() # building tree print("\n----------DECISION TREE MONK-1 DEPTH 2----------\n") for v in selected.values: print(selected, "=", v) subset = dt.select(md.monk1, selected, v) if dt.allPositive(subset) or dt.allNegative(subset): print(selected, "=", v, "->", dt.mostCommon(subset)) else: attributes_left = [a for a in md.attributes if a != selected] gains = dict( zip(attributes_left, [dt.averageGain(subset, a) for a in attributes_left])) print("Information gains:", gains) best = max(gains, key=gains.get) print("Best attribute for split:", best) for v2 in best.values: print(best, "=", v2, "->", dt.mostCommon(dt.select(subset, best, v2))) print() # dr.drawTree(dt.buildTree(md.monk1, md.attributes, 2)) print(dt.buildTree(md.monk1, md.attributes, 2))