def pruningComputation (trainDataset, valDataset): tree = dtree.buildTree(trainDataset, m.attributes) startPerformance = dtree.check(tree, valDataset) notChanged = True # For the first time while notChanged: notChanged=False alternatives = dtree.allPruned(tree) for x in alternatives: newPerformance = dtree.check(x, valDataset) if newPerformance > startPerformance: tree = x startPerformance = newPerformance notChanged=True return dtree.check(tree, valDataset)
def assignment3(): print "Assignment 3:" max_val = 0 for each in m.attributes[4].values: newDataset = dtree.select(m.monk1, m.attributes[4], each) for idy,attr in enumerate(m.attributes): print "Attr", idy,"=", gmax = dtree.averageGain(newDataset,attr) max_val = max(max_val,gmax) print gmax print "The highest value =", max_val for idx,x in enumerate(datasets): t=dtree.buildTree(x, m.attributes) print datasetnames[idx],":" print "training set", dtree.check(t,x) print "Test set",dtree.check(t,testsets[idx]) print "\n"
if newPerformance > startPerformance: tree = x startPerformance = newPerformance notChanged=True return dtree.check(tree, valDataset) def assignment4(nIter): print "Assignment 4:" print "Averaging", nIter, "times" for data in (m.monk1, m.monk3): # First iteration: monk1 and monk3 datasets print "monk1" if data == m.monk1 else "monk3" for fraction in fractions: # Second iteration: fraction training and pruning average = 0 for iteration in range(0,nIter): # Computing average for several partitions of the same dataset/fraction trainDataset, valDataset = partition(data, fraction) average += pruningComputation(trainDataset, valDataset) print "Fraction", fraction, ":", float(average) / nIter print "" assignment1() assignment2() print "ownBuildTree" ownBuildTree(m.monk1, m.attributes) tree = dtree.buildTree(m.monk1, m.attributes,2) print tree #dt.drawTree(tree) assignment3() assignment4(10)