def main(): print('--- Adaboost ---') data = datasets.load_digits() X, y = data.data, data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] y[y == digit1] = 1 y[y == digit2] = -1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = Adaboost(n_estimators=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy_score(y_pred, y_test) clf_tree = ClassificationTree() clf_tree.fit(X_train, y_train) y_pred_tree = clf_tree.predict(X_test) acc_tree = accuracy_score(y_pred, y_test) print("Adaboost_Accuracy:", acc) print("Tree_Accuracy:", acc_tree)
def fit(self, X, y): n_samples, n_features = X.shape[0], X.shape[1] # 初始化各样本的权重 w = np.full(n_samples, (1 / n_samples)) # 储存每个分类器 self.clfs = [] for i in tqdm(range(self.n_clfs)): # 实例化一个分类器 clf = ClassificationTree() # 训练 clf.fit(X, y) # 得到训练结果 y_pred = clf.predict(X) # 计算训练误差 print(accuracy_score(y, y_pred)) error = sum(w[y != y_pred]) # 对于错误率大于0.5的分类器,因为是二分类问题(Adaboost只能解决二分类问题),我们可以翻转 # 分类器的预测结果来使得其错误率为1-error>0.5 print(error) if error > 0.5: self.polarity[i] = -1 y_pred *= -1 error = 1 - error self.alphas[i] = 0.5 * np.log((1.0 - error) / (error + 1e-10)) predictions = np.array(self.polarity[i] * y_pred) w *= np.exp(-self.alphas[i] * y * predictions) w /= sum(w) self.clfs.append(clf)