Esempio n. 1
0
    def train(self, max_depth=3):
        for i in range(self.T):
            pred_y = np.array([])  # k*m
            # Train weak learner using distribution D_t
            for j in range(self.k):
                self.h_t = self.model(max_depth=max_depth, presort=True)
                self.h_t.fit(self.X,
                             self.y[j * self.m:(j + 1) * self.m],
                             sample_weight=self.D_t[j * self.m:(j + 1) *
                                                    self.m])
                # Get weak hypothesis h_t
                self.h.append(self.h_t)

                # Choose alpha_t
                pred_y = np.append(pred_y, self.h_t.predict(self.X))

            r_t = np.dot(self.D_t, np.multiply(self.y, pred_y))

            if abs(r_t - 1) < 0.00000001:
                self.alpha_t = 0.5 * log(
                    (1 + r_t + 0.000001) / (1 - r_t + 0.000001))
            else:
                self.alpha_t = 0.5 * log((1 + r_t) / (1 - r_t))

            self.alphas = np.append(self.alphas, self.alpha_t)
            # Update
            self.D_t = np.multiply(
                self.D_t,
                list(map(exp, -self.alpha_t * np.multiply(self.y, pred_y))))
            self.D_t /= np.sum(self.D_t)
            # self.D = np.append(self.D, self.D_t)

            ret_index = self.predict(self.test_X, i + 1)
            # for i in range(len(X_test)):
            #     print(ret_index[i])
            #     print(y_test[i])
            scores = one_error(ret_index, self.test_y)
            y_train = []
            at_n = 3
            for j in range(len(ret_index)):
                tmp = [0] * self.k
                for ll in ret_index[j]:
                    if ret_index[j].index(ll) < self.k - at_n:
                        continue
                    tmp[self.class_list.index(ll)] = 1
                ret_index[j] = tmp

                tmp = [0] * self.k
                for ll in self.test_y[j]:
                    tmp[self.class_list.index(ll)] = 1
                y_train.append(tmp)

            precision, recall, error = evaluate(np.array(ret_index),
                                                np.array(y_train))
            print(i, precision, recall, error, scores)
Esempio n. 2
0
def SDSS():
    path = 'data/SDSS.csv'
    x, y, class_list = load_data(path)
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.4,
                                                        random_state=42)
    clf = discrete_Adaboost_MH(X_train, y_train, class_list, T=50)
    ret_index = clf.predict(X_test)
    for i in range(len(X_test)):
        print(ret_index[i])
        print(y_test[i])
    scores = one_error(ret_index, y_test, all_=True)
    print('----------SDSS one error-----------\n', scores)
Esempio n. 3
0
def yeast():
    path = 'data/yeast'
    X_train, y_train, X_test, y_test, class_list = load_data(path)
    clf = discrete_Adaboost_MR(X_train,
                               y_train,
                               X_test,
                               y_test,
                               class_list,
                               T=100)
    ret_index, pred = clf.predict(X_test, clf.T)
    # for i in range(len(X_test)):
    #     print(ret_index[i])
    #     print(y_test[i])
    scores = one_error(ret_index, y_test)
    print('----------yeast one error on test-----------\n', scores)
Esempio n. 4
0
def yeast():
    path = 'data/yeast'
    X_train, y_train, X_test, y_test, class_list = load_data(path)
    clf = discrete_Adaboost_MH(X_train,
                               y_train,
                               X_test,
                               y_test,
                               class_list,
                               T=200)
    ret_index = clf.predict(X_test, clf.T)
    for i in range(len(X_test)):
        print(ret_index[i])
        print(y_test[i])
    scores = one_error(ret_index, y_test, all_=True)
    print('----------yeast one error-----------\n', scores)
Esempio n. 5
0
    def train(self, max_depth=3):
        for i in range(self.T):
            pred_y = np.array([])  # k*m
            # Train weak learner using distribution D_t
            for j in range(self.k):
                self.h_t = self.model(max_depth=max_depth, presort=True)
                self.h_t.fit(self.X,
                             self.y[j * self.m:(j + 1) * self.m],
                             sample_weight=np.sum(self.D_t, axis=1)[:, j])
                # Get weak hypothesis h_t
                self.h.append(self.h_t)

                # Choose alpha_t
                pred_y = np.append(pred_y, self.h_t.predict(self.X))

            tmp_sum = 0
            for (x_i, l0, l1) in self.pairs:
                tmp_sum += self.D_t[x_i][l0][l1] * (pred_y[l1 * self.m + x_i] -
                                                    pred_y[l0 * self.m + x_i])
            # for i in range(self.m):
            #     for l0 in range(self.k):
            #         for l1 in range(self.k):
            #             if abs(self.D_t[i][l0][l1]) < 0.0000001:
            #                 continue
            #             tmp_sum += self.D_t[i][l0][l1]*(pred_y[i*self.k+l1]-pred_y[i*self.k+l0])
            r_t = tmp_sum * 0.5

            if abs(r_t - 1) < 0.00000001:
                self.alpha_t = 0.5 * log(
                    (1 + r_t + 0.000001) / (1 - r_t + 0.000001))
            else:
                self.alpha_t = 0.5 * log((1 + r_t) / (1 - r_t))

            self.alphas = np.append(self.alphas, self.alpha_t)
            # Update
            for (x_i, l0, l1) in self.pairs:
                self.D_t[x_i][l0][l1] = self.D_t[x_i][l0][l1] * exp(
                    0.5 * self.alpha_t *
                    (pred_y[l0 * self.m + x_i] - pred_y[l1 * self.m + x_i]))
            self.D_t /= np.sum(self.D_t)
            # self.D = np.append(self.D, self.D_t)

            ret_index, pred = self.predict(self.test_X, i + 1)
            # for i in range(len(X_test)):
            #     print(ret_index[i])
            #     print(y_test[i])
            scores = one_error(ret_index, self.test_y)
            y_train = []
            at_n = 3
            for j in range(len(ret_index)):
                tmp = [0] * self.k
                for ll in ret_index[j]:
                    if ret_index[j].index(ll) < self.k - at_n:
                        continue
                    tmp[self.class_list.index(ll)] = 1
                ret_index[j] = tmp

                tmp = [0] * self.k
                for ll in self.test_y[j]:
                    tmp[self.class_list.index(ll)] = 1
                y_train.append(tmp)

            precision, recall, error = evaluate(np.array(ret_index),
                                                np.array(y_train))
            print(i, precision, recall, error, scores)