def init(train_path, test_path, validation_path, l, k, toprint): featureSet = util.parseLines(train_path) rootNode = id3(list(util.headers), None, featureSet) if(toprint=="yes"): rootNode.printTree(0) testSet = util.parseLines(test_path) accuracy1 = classifySet(rootNode, testSet) accuracy = accuracy1+0.2 print "Accuracy before pruning:", accuracy1 for i in range(0,10): ll = random.randint(1,l) kk = random.randint(1,k) dbest = rootNode.postpruning(ll, kk, validation_path) accuracy2 = classifySet(dbest, testSet) if(accuracy2 > accuracy1): accuracy = accuracy2 print "Accuracy after pruning:", accuracy
def postpruning(self, l, k, validation_path): validationSet = util.parseLines(validation_path) dbest = copyTree(self) for i in range(1, l): ddash = copyTree(dbest) m = random.randint(1, k) for j in range(1,m): global nonleaf nonleaf = [] getNonLeafNodes(ddash) n = len(nonleaf) if(n>1): p = random.randint(1, n-1) chosenNode = nonleaf.pop() class0 = chosenNode.classSet.count(0) class1 = chosenNode.classSet.count(1) chosenNode.childNode0 = None chosenNode.childNode1 = None chosenNode.name = "Yes" if class0 > class1 else "No" dbestacc = classifySet(dbest, validationSet) ddashacc = classifySet(ddash, validationSet) if(ddashacc>dbestacc): dbest = copyTree(ddash) return dbest