def train(self, max_depth=3): for i in range(self.T): pred_y = np.array([]) # k*m # Train weak learner using distribution D_t for j in range(self.k): self.h_t = self.model(max_depth=max_depth, presort=True) self.h_t.fit(self.X, self.y[j * self.m:(j + 1) * self.m], sample_weight=self.D_t[j * self.m:(j + 1) * self.m]) # Get weak hypothesis h_t self.h.append(self.h_t) # Choose alpha_t pred_y = np.append(pred_y, self.h_t.predict(self.X)) r_t = np.dot(self.D_t, np.multiply(self.y, pred_y)) if abs(r_t - 1) < 0.00000001: self.alpha_t = 0.5 * log( (1 + r_t + 0.000001) / (1 - r_t + 0.000001)) else: self.alpha_t = 0.5 * log((1 + r_t) / (1 - r_t)) self.alphas = np.append(self.alphas, self.alpha_t) # Update self.D_t = np.multiply( self.D_t, list(map(exp, -self.alpha_t * np.multiply(self.y, pred_y)))) self.D_t /= np.sum(self.D_t) # self.D = np.append(self.D, self.D_t) ret_index = self.predict(self.test_X, i + 1) # for i in range(len(X_test)): # print(ret_index[i]) # print(y_test[i]) scores = one_error(ret_index, self.test_y) y_train = [] at_n = 3 for j in range(len(ret_index)): tmp = [0] * self.k for ll in ret_index[j]: if ret_index[j].index(ll) < self.k - at_n: continue tmp[self.class_list.index(ll)] = 1 ret_index[j] = tmp tmp = [0] * self.k for ll in self.test_y[j]: tmp[self.class_list.index(ll)] = 1 y_train.append(tmp) precision, recall, error = evaluate(np.array(ret_index), np.array(y_train)) print(i, precision, recall, error, scores)
def SDSS(): path = 'data/SDSS.csv' x, y, class_list = load_data(path) X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=42) clf = discrete_Adaboost_MH(X_train, y_train, class_list, T=50) ret_index = clf.predict(X_test) for i in range(len(X_test)): print(ret_index[i]) print(y_test[i]) scores = one_error(ret_index, y_test, all_=True) print('----------SDSS one error-----------\n', scores)
def yeast(): path = 'data/yeast' X_train, y_train, X_test, y_test, class_list = load_data(path) clf = discrete_Adaboost_MR(X_train, y_train, X_test, y_test, class_list, T=100) ret_index, pred = clf.predict(X_test, clf.T) # for i in range(len(X_test)): # print(ret_index[i]) # print(y_test[i]) scores = one_error(ret_index, y_test) print('----------yeast one error on test-----------\n', scores)
def yeast(): path = 'data/yeast' X_train, y_train, X_test, y_test, class_list = load_data(path) clf = discrete_Adaboost_MH(X_train, y_train, X_test, y_test, class_list, T=200) ret_index = clf.predict(X_test, clf.T) for i in range(len(X_test)): print(ret_index[i]) print(y_test[i]) scores = one_error(ret_index, y_test, all_=True) print('----------yeast one error-----------\n', scores)
def train(self, max_depth=3): for i in range(self.T): pred_y = np.array([]) # k*m # Train weak learner using distribution D_t for j in range(self.k): self.h_t = self.model(max_depth=max_depth, presort=True) self.h_t.fit(self.X, self.y[j * self.m:(j + 1) * self.m], sample_weight=np.sum(self.D_t, axis=1)[:, j]) # Get weak hypothesis h_t self.h.append(self.h_t) # Choose alpha_t pred_y = np.append(pred_y, self.h_t.predict(self.X)) tmp_sum = 0 for (x_i, l0, l1) in self.pairs: tmp_sum += self.D_t[x_i][l0][l1] * (pred_y[l1 * self.m + x_i] - pred_y[l0 * self.m + x_i]) # for i in range(self.m): # for l0 in range(self.k): # for l1 in range(self.k): # if abs(self.D_t[i][l0][l1]) < 0.0000001: # continue # tmp_sum += self.D_t[i][l0][l1]*(pred_y[i*self.k+l1]-pred_y[i*self.k+l0]) r_t = tmp_sum * 0.5 if abs(r_t - 1) < 0.00000001: self.alpha_t = 0.5 * log( (1 + r_t + 0.000001) / (1 - r_t + 0.000001)) else: self.alpha_t = 0.5 * log((1 + r_t) / (1 - r_t)) self.alphas = np.append(self.alphas, self.alpha_t) # Update for (x_i, l0, l1) in self.pairs: self.D_t[x_i][l0][l1] = self.D_t[x_i][l0][l1] * exp( 0.5 * self.alpha_t * (pred_y[l0 * self.m + x_i] - pred_y[l1 * self.m + x_i])) self.D_t /= np.sum(self.D_t) # self.D = np.append(self.D, self.D_t) ret_index, pred = self.predict(self.test_X, i + 1) # for i in range(len(X_test)): # print(ret_index[i]) # print(y_test[i]) scores = one_error(ret_index, self.test_y) y_train = [] at_n = 3 for j in range(len(ret_index)): tmp = [0] * self.k for ll in ret_index[j]: if ret_index[j].index(ll) < self.k - at_n: continue tmp[self.class_list.index(ll)] = 1 ret_index[j] = tmp tmp = [0] * self.k for ll in self.test_y[j]: tmp[self.class_list.index(ll)] = 1 y_train.append(tmp) precision, recall, error = evaluate(np.array(ret_index), np.array(y_train)) print(i, precision, recall, error, scores)