def _apply_bagging_loop(self, data): predicts = [] for t in range(self.T): if (t) % np.round(self.T / 10) == 0: self._bag_progress(t) applyInit = applyTree(data, self.treesInit[t], numerical=self.numerical) apply_ID3(applyInit) predicts.append(applyInit.predict) if self.verbose: print('100% done.\n') return predicts
def get_predictions(bagInit, test, key): h_bag = np.array(bagInit._apply_bagging_loop(test)) apply_single_tree = applyTree(test, bagInit.treesInit[0], numerical=True) apply_ID3(apply_single_tree) h_tree = np.array(apply_single_tree.predict) h_bag = (np.vectorize(key.get)(h_bag)).T alpha = np.array(bagInit.alpha) alpha_h = alpha * h_bag H = np.sum(alpha_h, axis=1) > 0 H_bag = H * 2 - 1 h_tree = np.vectorize(key.get)(h_tree) return H_bag, h_tree
def _apply_AdaBoost(self, data): # h_t = [] predicts = [] for t in range(self.T): if (t) % np.round(self.T / 10) == 0: self._progress(t) tree_init = self.learners_init[t] applyInit = applyTree(data, tree_init, weights=tree_init.weights, numerical=True) apply_ID3(applyInit) predicts.append(applyInit.predict) print('Done applying \n') return predicts
def _calc_vote(self, stump_init, t, D, numerical=False): err_init = applyTree(self.data, stump_init, weights=D, numerical=numerical) h_t, total_err = apply_ID3(err_init) # total_err = 1 - total_acc if total_err > 0.5: print(f'Total error was {total_err}, which is greater than 50%') self.errs_w[t] = total_err self.errs[t] = 1 - sum(h_t) / len(h_t) self.alpha[t] = 0.5 * np.log((1 - total_err) / (total_err)) return h_t
def _applyAndError(self, dt, test, treeInit, numerical=False): """applies the tree and gives you total error Parameters ---------- :dt: decisionTree object :attr: training attributes :labels: training labels :num: if numerical or not Returns ------- :err: total accuracy """ # apply err = 0 errinit = applyTree(dt, test, treeInit, numerical=numerical) _, err = apply_ID3(errinit) return err
train0 = pd.read_csv('car/train.csv', names=cols) test0 = pd.read_csv('car/test.csv', names=cols) attrTrain0 = np.array(train0.iloc[:, :-1]) attrTest0 = np.array(test0.iloc[:, :-1]) attrNames0 = cols[:-1] labelsTrain0 = np.array(train0.iloc[:, -1]) labelsTest0 = np.array(test0.iloc[:, -1]) # %% training the ID3 algo for testing carTreeInit = decisionTree(train0, method='entropy') carTree = run_ID3(carTreeInit) # %% applying the ID3 algo for testing car_errinit = applyTree(carTree, test0, carTreeInit) errs0, total_err0 = apply_ID3(car_errinit) # %% making trees tic = time.perf_counter() methods = ['entropy', 'ME', 'gini'] datTrain0 = [attrTrain0, labelsTrain0, train0] datTest0 = [attrTest0, labelsTest0, test0] dfs = [train0, test0] depths0 = len(attrNames0) errinit = tester(methods, dfs, depths=depths0) train_err_car, test_err_car = tester.test(errinit) toc = time.perf_counter() print('Time for car code is {:0.4f} seconds.'.format(toc - tic)) # %% plotting results and calc avgs
'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'y' ] train = pd.read_csv('bank/train.csv', names=cols) test = pd.read_csv('bank/test.csv', names=cols) train_no_unk = replace_unk(train.copy()) test_no_unk = replace_unk(test.copy()) # %% training the ID3 algo for testing tic = time.perf_counter() bankTreeInit = decisionTree(train, numerical=True) bankTree = run_ID3(bankTreeInit) # % applying the ID3 algo for testing errinit = applyTree(bankTree, train, bankTreeInit, numerical=True) errs, total_err = apply_ID3(errinit) toc = time.perf_counter() print('Time for bank code is {:0.4f} seconds.'.format(toc - tic)) # %% making trees tic = time.perf_counter() methods = ['entropy', 'ME', 'gini'] depths = len(train.columns) - 1 dfs = [train, test] errinit = tester(methods, dfs, depths=depths, numerical=True) train_err_bank, test_err_bank = tester.test(errinit) # % testing for replaced unknown values dfs2 = [train_no_unk, test_no_unk] errinit2 = tester(methods, dfs2, depths=depths, numerical=True)
def _calc_vote(self, tree_init, t, numerical=False): err_init = applyTree(self.data, tree_init, numerical=numerical) h_t, total_err = apply_ID3(err_init) self.errs[t] = total_err self.alpha[t] = 0.5 * np.log((1 - total_err) / total_err)