def ownBuildTree(dataset,attributes,max_depth=2,level=1): if max_depth < 1: return datasets=[] bestAttr = dtree.bestAttribute(dataset,attributes) attributesLeft = [x for x in attributes if x != bestAttr] if not dataset: print "Leaf Node: level",level,"Best attribute", bestAttr, "Most common",mostCommon(dataset) return elif dtree.allPositive(dataset): print "Leaf Node: level",level,"Best attribute", bestAttr, "All true" return elif dtree.allNegative(dataset): print "Leaf Node: level",level,"Best attribute", bestAttr, "all false" return for x in bestAttr.values: datasets.append(dtree.select(dataset, bestAttr, x)) for idx, d in enumerate(datasets): print "set", idx, "in",level,"best attribute", bestAttr for attr in attributes: print "attr",attr,":",dtree.averageGain(d, attr) for d in datasets: ownBuildTree(d,attributesLeft,max_depth-1,level+1) print "\n"
def ownTreeBuilder(dataset, attributes): max_val = 0 for each in attributes.values: newDataset = dtree.select(dataset, attributes, each) for idy,attr in enumerate(m.attributes): print "Attr", idy,"=", gmax = dtree.averageGain(newDataset,attr) max_val = max(max_val,gmax) print gmax print "The highest value =", max_val
def assignment2(): print "Assignment 2:" for idx, dataset in enumerate(datasets): print "Dataset",idx,":" max_val=0 for idy,attr in enumerate(m.attributes): print "Attr", idy,"=", gmax = dtree.averageGain(dataset,attr) max_val = max(max_val,gmax) print gmax print "The highest value =", max_val print "\n"
def assignment3(): print "Assignment 3:" max_val = 0 for each in m.attributes[4].values: newDataset = dtree.select(m.monk1, m.attributes[4], each) for idy,attr in enumerate(m.attributes): print "Attr", idy,"=", gmax = dtree.averageGain(newDataset,attr) max_val = max(max_val,gmax) print gmax print "The highest value =", max_val for idx,x in enumerate(datasets): t=dtree.buildTree(x, m.attributes) print datasetnames[idx],":" print "training set", dtree.check(t,x) print "Test set",dtree.check(t,testsets[idx]) print "\n"