def pruningComputation (trainDataset, valDataset): tree = dtree.buildTree(trainDataset, m.attributes) startPerformance = dtree.check(tree, valDataset) notChanged = True # For the first time while notChanged: notChanged=False alternatives = dtree.allPruned(tree) for x in alternatives: newPerformance = dtree.check(x, valDataset) if newPerformance > startPerformance: tree = x startPerformance = newPerformance notChanged=True return dtree.check(tree, valDataset)
def assignment3(): print "Assignment 3:" max_val = 0 for each in m.attributes[4].values: newDataset = dtree.select(m.monk1, m.attributes[4], each) for idy,attr in enumerate(m.attributes): print "Attr", idy,"=", gmax = dtree.averageGain(newDataset,attr) max_val = max(max_val,gmax) print gmax print "The highest value =", max_val for idx,x in enumerate(datasets): t=dtree.buildTree(x, m.attributes) print datasetnames[idx],":" print "training set", dtree.check(t,x) print "Test set",dtree.check(t,testsets[idx]) print "\n"