Esempio n. 1
0
 def _apply_bagging_loop(self, data):
     predicts = []
     for t in range(self.T):
         if (t) % np.round(self.T / 10) == 0:
             self._bag_progress(t)
         applyInit = applyTree(data,
                               self.treesInit[t],
                               numerical=self.numerical)
         apply_ID3(applyInit)
         predicts.append(applyInit.predict)
     if self.verbose:
         print('100% done.\n')
     return predicts
def get_predictions(bagInit, test, key):
    h_bag = np.array(bagInit._apply_bagging_loop(test))
    apply_single_tree = applyTree(test, bagInit.treesInit[0], numerical=True)
    apply_ID3(apply_single_tree)
    h_tree = np.array(apply_single_tree.predict)

    h_bag = (np.vectorize(key.get)(h_bag)).T
    alpha = np.array(bagInit.alpha)
    alpha_h = alpha * h_bag
    H = np.sum(alpha_h, axis=1) > 0
    H_bag = H * 2 - 1

    h_tree = np.vectorize(key.get)(h_tree)

    return H_bag, h_tree
    def _apply_AdaBoost(self, data):
        # h_t = []
        predicts = []
        for t in range(self.T):
            if (t) % np.round(self.T / 10) == 0:
                self._progress(t)
            tree_init = self.learners_init[t]

            applyInit = applyTree(data,
                                  tree_init,
                                  weights=tree_init.weights,
                                  numerical=True)
            apply_ID3(applyInit)
            predicts.append(applyInit.predict)
        print('Done applying \n')
        return predicts
    def _calc_vote(self, stump_init, t, D, numerical=False):
        err_init = applyTree(self.data,
                             stump_init,
                             weights=D,
                             numerical=numerical)
        h_t, total_err = apply_ID3(err_init)

        # total_err = 1 - total_acc
        if total_err > 0.5:
            print(f'Total error was {total_err}, which is greater than 50%')
        self.errs_w[t] = total_err
        self.errs[t] = 1 - sum(h_t) / len(h_t)
        self.alpha[t] = 0.5 * np.log((1 - total_err) / (total_err))

        return h_t
    def _applyAndError(self, dt, test, treeInit, numerical=False):
        """applies the tree and gives you total error

        Parameters
        ----------
        :dt: decisionTree object
        :attr: training attributes
        :labels: training labels
        :num: if numerical or not

        Returns
        -------
        :err: total accuracy
        """
        # apply
        err = 0
        errinit = applyTree(dt, test, treeInit, numerical=numerical)
        _, err = apply_ID3(errinit)
        return err
Esempio n. 6
0
train0 = pd.read_csv('car/train.csv', names=cols)
test0 = pd.read_csv('car/test.csv', names=cols)

attrTrain0 = np.array(train0.iloc[:, :-1])
attrTest0 = np.array(test0.iloc[:, :-1])
attrNames0 = cols[:-1]
labelsTrain0 = np.array(train0.iloc[:, -1])
labelsTest0 = np.array(test0.iloc[:, -1])

# %% training the ID3 algo for testing
carTreeInit = decisionTree(train0, method='entropy')
carTree = run_ID3(carTreeInit)

# %% applying the ID3 algo for testing
car_errinit = applyTree(carTree, test0, carTreeInit)
errs0, total_err0 = apply_ID3(car_errinit)

# %% making trees
tic = time.perf_counter()
methods = ['entropy', 'ME', 'gini']
datTrain0 = [attrTrain0, labelsTrain0, train0]
datTest0 = [attrTest0, labelsTest0, test0]
dfs = [train0, test0]
depths0 = len(attrNames0)

errinit = tester(methods, dfs, depths=depths0)
train_err_car, test_err_car = tester.test(errinit)
toc = time.perf_counter()
print('Time for car code is {:0.4f} seconds.'.format(toc - tic))

# %% plotting results and calc avgs
    'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
    'previous', 'poutcome', 'y'
]
train = pd.read_csv('bank/train.csv', names=cols)
test = pd.read_csv('bank/test.csv', names=cols)
train_no_unk = replace_unk(train.copy())
test_no_unk = replace_unk(test.copy())

# %% training the ID3 algo for testing
tic = time.perf_counter()
bankTreeInit = decisionTree(train, numerical=True)
bankTree = run_ID3(bankTreeInit)

# % applying the ID3 algo for testing
errinit = applyTree(bankTree, train, bankTreeInit, numerical=True)
errs, total_err = apply_ID3(errinit)
toc = time.perf_counter()
print('Time for bank code is {:0.4f} seconds.'.format(toc - tic))

# %% making trees
tic = time.perf_counter()
methods = ['entropy', 'ME', 'gini']
depths = len(train.columns) - 1
dfs = [train, test]

errinit = tester(methods, dfs, depths=depths, numerical=True)
train_err_bank, test_err_bank = tester.test(errinit)

# % testing for replaced unknown values
dfs2 = [train_no_unk, test_no_unk]
errinit2 = tester(methods, dfs2, depths=depths, numerical=True)
Esempio n. 8
0
 def _calc_vote(self, tree_init, t, numerical=False):
     err_init = applyTree(self.data, tree_init, numerical=numerical)
     h_t, total_err = apply_ID3(err_init)
     self.errs[t] = total_err
     self.alpha[t] = 0.5 * np.log((1 - total_err) / total_err)