def build(self): """ Build BINARY RST tree """ text = open(self.fname).read() self.tree = buildtree(text) self.tree = binarizetree(self.tree) self.tree = backprop(self.tree)
def accuracyFold(dataset=None,f=None,path="./Data"): foldPath=path+"/" + dataset + "/folds/" trainFoldData=pickle.load(open(foldPath + "trainFold_"+str(f)+".p","rb")) #Training tree on pessimistic mode tree=buildtree(trainFoldData) testFoldData=pickle.load(open(foldPath + "testFold_"+str(f)+".p","rb")) trueMatch=0 totalCmp=0 actual=[] predicted=[] for obs in testFoldData: totalCmp=totalCmp+1 result=mdclassify(obs,tree) #print("outcome is",result) actual.append(obs[-1]) predicted.append(result) if obs[-1] == result: trueMatch=trueMatch+1 accuracy=trueMatch/float(totalCmp) createLabels(dataset=dataset) labels=pickle.load(open(path+"/"+dataset+"/labels.p","rb")) target_names = labels print(classification_report(actual, predicted ,target_names=target_names)) '''
def accuracy10Fold(dataset=None,path="./Data"): # Training & Testing on corresponding Train Test pair. accuracyList=[] for f in range(1,11): #print("Building model from Training fold ",f) foldPath="./result/iris/iris" #Loading trainFold trainFoldData=pickle.load(open(foldPath+"train"+str(f)+".p","rb")) #trainFoldData=pickle.load(open("./Data/iris/folds/trainFold_1.p","rb")) #making normal decision Tree on this trainFold tree=buildtree(trainFoldData) #making pessimistic decision tree on this trainfold ''' tree=buildtree_pessimistic(trainFoldData) ''' ''' writepath_pes=path+ "/"+ dataset + "/treeview_pes_" +str(f)+ ".jpg" colname=['sepal_length','sepal_width','petal_length','petal_width'] drawtree(tree=tree,jpeg=writepath_pes,colname=colname) ''' #print("Testing model for Testing fold",f) #Loading tesfold to check accuracy of the model testFoldData=pickle.load(open(foldPath+"test"+str(f)+".p","rb")) # Checking target class for each observation in test fold trueMatch=0 totalCmp=0 actual=[] predictions=[] for obs in testFoldData: #print(f) #print("obs is",obs) totalCmp=totalCmp+1 result=mdclassify(obs,tree) #print("outcome is",result) #print (obs[-1],result) #if obs[-1] == result.keys()[0]: #i,e Target class is predicted correctly actual.append(obs[-1]) predictions.append(result) if obs[-1] == result: trueMatch=trueMatch+1 accuracy=trueMatch/float(totalCmp) print("Accuracy for Testing fold",f ,accuracy) accuracyList.append(accuracy) #print(accuracyList) finalAccuracy=sum(accuracyList)/float(10) print("Final Accuracy of " ,dataset,finalAccuracy) return finalAccuracy
def build(self, strTree, vocab): """ (1) Read structure from file, and (2) build tree structure, and (3) initialize leaf node Refer buildtree.py for more detail :type strTree: string :param strTree: string of the tree information :type vocab: Vocab instance :param vocab: word vocab with representation """ self.root = buildtree(strTree, vocab)
def accuracy10Fold(dataset=None,path="./Data"): # Training & Testing on corresponding Train Test pair. accuracyList=[] for f in range(1,11): #print("Building model from Training fold ",f) foldPath=path+"/" + dataset + "/folds/" #Loading trainFold trainFoldData=pickle.load(open(foldPath + "trainFold_"+str(f)+".p","rb")) #trainFoldData=pickle.load(open("./Data/iris/folds/trainFold_1.p","rb")) #making normal decision Tree on this trainFold tree=buildtree(trainFoldData) #pruning the tree using pessimistic error or mdl error #tree=prune(tree) #making pessimistic decision tree on this trainfold #print("Testing model for Testing fold",f) #Loading tesfold to check accuracy of the model testFoldData=pickle.load(open(foldPath + "testFold_"+str(f)+".p","rb")) # Checking target class for each observation in test fold trueMatch=0 totalCmp=0 actual=[] predictions=[] for obs in testFoldData: #print("obs is",obs) totalCmp=totalCmp+1 result=mdclassify(obs,tree) #print("outcome is",result) #print (obs[-1],result) #if obs[-1] == result.keys()[0]: #i,e Target class is predicted correctly actual.append(obs[-1]) predictions.append(result) if obs[-1] == result: trueMatch=trueMatch+1 accuracy=trueMatch/float(totalCmp) print("Accuracy for Testing fold",f ,accuracy) accuracyList.append(accuracy) #print(accuracyList) finalAccuracy=sum(accuracyList)/float(10) print("Final Accuracy of " ,dataset,finalAccuracy) return finalAccuracy
def build(self): """ Build BINARY RST tree """ text = open(self.fdis).read() # Build RST as annotation self.tree = buildtree(text) # Binarize it self.tree = binarizetree(self.tree) # Read doc file if isfile(self.fmerge): dr = DocReader() self.doc = dr.read(self.fmerge) else: raise IOError("File doesn't exist: {}".format(self.fmerge)) # Prop information from doc on the binarized RST tree self.tree = backprop(self.tree, self.doc)
def printtree(dataset=None,path="./Data",filename="data.p"): filepath=path+ "/"+ dataset + "/" + filename data=pickle.load(open(filepath,"rb")) writepath=path+ "/"+ dataset + "/treeview.jpg" writepath_pes=path+ "/"+ dataset + "/treeview_pes.jpg" writepath_prun=path+ "/"+ dataset + "/treeview_prun_mdl.jpg" tree=buildtree(rows=data[1:len(data)]) drawtree(tree=tree,jpeg=writepath,colname=data[0]) tree_prun=prune(tree) drawtree(tree=tree_prun,jpeg=writepath_prun,colname=data[0]) tree_pes=buildtree_pessimistic(rows=data[1:len(data)]) #colname=['sepal_length','sepal_width','petal_length','petal_width'] drawtree(tree=tree_pes,jpeg=writepath_pes,colname=data[0]) drawtree(tree=tree_pes,jpeg=writepath_pes,colname=colname) drawtree(tree=prune(tree,0.3),jpeg=writepath,colname=data[0])
def accuracy(filename): accuracyList = [] for iteration in range(1, 11): subpath = "./result/" + filename + "/" + filename train_data = pickle.load( open(subpath + "train" + str(iteration) + ".p", "rb")) tree = buildtree(train_data) test_data = pickle.load( open(subpath + "test" + str(iteration) + ".p", "rb")) correct = 0 comparison = 0 for each in test_data: comparison += 1 predicted_class = mdclassify(each, tree) if each[-1] == predicted_class: correct += 1 test_accuracy = correct / float(comparison) accuracyList.append(test_accuracy) accuracy = sum(accuracyList) / float(10) print("Final Accuracy:" + str(accuracy * 100)) return accuracy * 100
def main(): # header = ['sepal_length','sepal_width','petal_length','petal_width','class'] # data = data_formatter.data_formatter("./data/","iris","./result/iris",header) # tree = buildtree(data) # printtree(tree) # drawtree(tree=tree,jpeg="./result/iris/tree-iris.jpg",colname=data[0]) # genfolds("./result/","iris","./result/") # acc = accuracy("iris") # prune(tree) # drawtree(tree=tree,jpeg="./result/iris/tree-prune-iris.jpg",colname=data[0]) # print("header") # header_file = open("./data/banknote/header.p") # header2 = pickle.load(header_file) # print("header2") # #data2 = data_formatter.data_formatter("./data/","banknote","./result/banknote",header2) # data_file = open("./data/banknote/data.p") # data2 = pickle.load(data_file) # print("header3") # tree2 = buildtree(data2) # print("header44") # printtree(tree2) # print("header45") # drawtree(tree=tree2,jpeg="./result/banknote/tree-banknote.jpg",colname=data2[0]) # print("header5") # genfolds("./result/","banknote","./result/") # acc = accuracy("banknote") # tree2 = prune(tree2) # drawtree(tree=tree2,jpeg="./result/banknote/tree-banknote-iris.jpg",colname=data2[0]) # print("header") # header3 = "./data/banknote/header-c.p" # print("header3") # data2 = data_formatter.data_formatter("./data/","car","./result/car",header3) # print("header3") # tree2 = buildtree(data3) # print("header44") # printtree(tree3) # print("header45") # drawtree(tree=tree3,jpeg="./result/car/tree-car.jpg",colname=data[0]) # print("header5") # genfolds("./result/","car","./result/") # acc = accuracy("car") # tree2 = prune(tree2) # drawtree(tree=tree2,jpeg="./car/banknote/tree-car.jpg",colname=data[0]) # # print("header") # header_file = open("./data/haberman/header.p") # header2 = pickle.load(header_file) # print("header2") # #data2 = data_formatter.data_formatter("./data/","banknote","./result/banknote",header2) # data_file = open("./data/haberman/data.p") # data2 = pickle.load(data_file) # print("header3") # tree2 = buildtree(data2) # print("header44") # printtree(tree2) # print("header45") # drawtree(tree=tree2,jpeg="./result/haberman/tree-haberman.jpg",colname=data2[0]) # print("header5") # genfolds("./result/","haberman","./result/") # acc = accuracy("haberman") # tree2 = prune(tree2) # drawtree(tree=tree2,jpeg="./result/haberman/haberman.jpg",colname=data2[0]) # print("header") print("header") header_file = open("./data/wine/header.p") header2 = pickle.load(header_file) print("wine") #data2 = data_formatter.data_formatter("./data/","banknote","./result/banknote",header2) data_file = open("./data/wine/data.p") data2 = pickle.load(data_file) print("header3") tree2 = buildtree(data2) print("header44") printtree(tree2) print("header45") drawtree(tree=tree2,jpeg="./result/wine/tree-wine.jpg",colname=data2[0]) print("header5") genfolds("./result/","wine","./result/") acc = accuracy("wine") tree2 = prune(tree2) drawtree(tree=tree2,jpeg="./result/wine/wine.jpg",colname=data2[0]) print("header")