def pruning_decision_tree_test(): # load data X_train, y_train, X_test, y_test = data.sample_decision_tree_pruning() # build the tree dTree = decision_tree.DecisionTree() dTree.train(X_train, y_train) # print print('Your decision tree:') Utils.print_tree(dTree) print('My decision tree:') print( 'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 5 : 9 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: 1' '\n\t\tnum of samples for each class: 3 : 2 \n\t\tsplit by dim 1\n\t\tbranch 0->0->0{\n\t\t\tdeep: 2\n\t\t\t' 'num of samples for each class: 3 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->0->1{\n\t\t\tdeep: 2\n\t\t\tnum of ' 'samples for each class: 2 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of samples for ' 'each class: 4 \n\t\tclass:1\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of samples for each class: 2 : 3 ' '\n\t\tsplit by dim 2\n\t\tbranch 0->2->0{\n\t\t\tdeep: 2\n\t\t\tnum of samples for each class: 3 \n\t\t\t' 'class:1\n\t\t}\n\t\tbranch 0->2->1{\n\t\t\tdeep: 2\n\t\t\tnum of samples for each class: 2 \n\t\t\tclass:0' '\n\t\t}\n\t}\n}') Utils.reduced_error_prunning(dTree, X_test, y_test) print('Your decision tree after pruning:') Utils.print_tree(dTree) print('My decision tree after pruning:') print( 'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 5 : 9 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: ' '1\n\t\tnum of samples for each class: 3 : 2 \n\t\tsplit by dim 1\n\t\tbranch 0->0->0{\n\t\t\tdeep: 2\n\t\t\t' 'num of samples for each class: 3 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->0->1{\n\t\t\tdeep: 2\n\t\t\tnum of ' 'samples for each class: 2 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of samples for ' 'each class: 4 \n\t\tclass:1\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of samples for each class: 2 : 3 ' '\n\t\tclass:1\n\t}\n}')
def decision_tree_test(): features, labels = data.sample_decision_tree_data() # build the tree dTree = decision_tree.DecisionTree() dTree.train(features, labels) # print print('Your decision tree: ') Utils.print_tree(dTree) print('My decision tree: ') print( 'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 2 : 2 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: ' '1\n\t\tnum of samples for each class: 1 \n\t\tclass:0\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of ' 'samples for each class: 1 : 1 \n\t\tsplit by dim 0\n\t\tbranch 0->1->0{\n\t\t\tdeep: 2\n\t\t\tnum of ' 'samples for each class: 1 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->1->1{\n\t\t\tdeep: 2\n\t\t\tnum of ' 'samples for each class: 1 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of ' 'samples for each class: 1 \n\t\tclass:1\n\t}\n}') # data X_test, y_test = data.sample_decision_tree_test() # testing y_est_test = dTree.predict(X_test) print('Your estimate test: ', y_est_test) print('My estimate test: ', [0, 0, 1])
def test_big_tree(): # load data X_train, y_train, X_test, y_test = data.load_decision_tree_data() # set classifier dTree = decision_tree.DecisionTree() # training dTree.train(X_train.tolist(), y_train.tolist()) # print # Utils.print_tree(dTree) # testing y_est_test = dTree.predict(X_test) test_accu = accuracy_score(y_est_test, y_test) print('test_accu', test_accu) Utils.reduced_error_prunning(dTree, X_test, y_test) y_est_test = dTree.predict(X_test) test_accu = accuracy_score(y_est_test, y_test) print('test_accu', test_accu) # print Utils.print_tree(dTree)
def test_tree(): features, labels = data.sample_decision_tree_data() # build the tree dTree = decision_tree.DecisionTree() dTree.train(features, labels) # print Utils.print_tree(dTree) # data X_test, y_test = data.sample_decision_tree_test() # testing y_est_test = dTree.predict(X_test) test_accu = accuracy_score(y_est_test, y_test) print('test_accu', test_accu) Utils.reduced_error_prunning(dTree, X_test, y_test) y_est_test = dTree.predict(X_test) test_accu = accuracy_score(y_est_test, y_test) print('test_accu', test_accu)
scaling_classes = { 'min_max_scale': MinMaxScaler, 'normalize': NormalizationScaler, } #best_model, best_k, best_function, best_scaler = model_selection_with_transformation(distance_funcs, scaling_classes, Xtrain, ytrain, Xval, yval) import data import hw1_dt as decision_tree import utils as Utils from sklearn.metrics import accuracy_score features, labels = data.sample_decision_tree_data() # build the tree dTree = decision_tree.DecisionTree() dTree.train(features, labels) # print Utils.print_tree(dTree) # data X_test, y_test = data.sample_decision_tree_test() # testing y_est_test = dTree.predict(X_test) test_accu = accuracy_score(y_est_test, y_test) print('test_accu', test_accu) """
def reduced_error_prunning(decisionTree, X_test, y_test): if not decisionTree.root_node.splittable: return """ predict = node.predict() """ if decisionTree.root_node.splittable: labels =y_test to_split = decisionTree.root_node.feature_uniq_split cut = decisionTree.root_node.dim_split dr=[] dl=[] for m in to_split: res = [] l = [] for i in range(len(X_test)): if m == X_test[i][cut]: l.append(labels[i]) a = list(X_test[i]) a.remove(m) res.append(a) dl.append(l) dr.append(res) for i in range(len(decisionTree.root_node.children)): a=hw.DecisionTree() a.root_node = decisionTree.root_node.children[i] if i<=(len(dr)-1) and i<=(len(dl)-1): reduced_error_prunning(a,dr[i],dl[i]) error1=0 error2=0 if a.predict(dr[i])and dl[i]: for x in range(len(a.predict(dr[i]))): if a.predict(dr[i])[x]==dl[i][x]: error1+=1 for y in dl[i]: if y==a.root_node.cls_max: error2+=1 if error1<=error2: a.root_node.splittable=False a.root_node.children=[] a.root_node.feature_uniq_split=None a.root_node.dim_split=None else: a.root_node.splittable=False a.root_node.children=[] a.root_node.feature_uniq_split=None a.root_node.dim_split=None else: a.root_node.splittable=False a.root_node.children=[] a.root_node.feature_uniq_split=None a.root_node.dim_split=None else: decisionTree.splittable=False decisionTree.children=[] decisionTree.feature_uniq_split=None decisionTree.dim_split=None return