def test_train(self): print("Test Train:") data_mat, labels_arr = adb.load_simple_data() classifies_arr, est_agg = adb.train(data_mat, labels_arr, 9) self.assertEqual(len(classifies_arr), 3) self.assertEqual(0, classifies_arr[-1]['dim']) self.assertEqual('lt', classifies_arr[-1]['ineq']) self.assertEqual(0.9, classifies_arr[-1]['thresh']) self.assertEqual(0.8958797346, round(classifies_arr[-1]['alpha'], 10))
def test_build_stump(self): print("Test Build Stump:") data_mat, labels_arr = adb.load_simple_data() w_data = np.mat(np.ones((5, 1)) / 5) best_stump, min_error, best_estimation = adb.build_stump( data_mat, labels_arr, w_data) self.assertEqual(best_stump, {'dim': 0, 'ineq': 'lt', 'thresh': 1.3}) self.assertEqual(min_error, np.matrix([[0.2]])) res = np.array([[-1.], [1.], [-1.], [-1.], [1.]]) self.assertEqual(True, (best_estimation == res).all())
def test_classify(self): print("Test Classify:") print("Classify Simple Data:") data_mat, labels_arr = adb.load_simple_data() classifies_arr, est_agg = adb.train(data_mat, labels_arr, 30) pred = adb.classify([[5, 5], [0, 0]], classifies_arr) res = np.matrix([[1.], [-1.]]) self.assertEqual(True, (pred == res).all()) print("Classify Loaded Data:") datArr, labelArr = adb.load_data_set('horseColicTraining2.txt') classiferArray, aggClassEst = adb.train(datArr, labelArr, 10) testArr, testLabelArr = adb.load_data_set('horseColicTest2.txt') prediction10 = adb.classify(testArr, classiferArray) errArr = np.mat(np.ones((67, 1))) err_rate = errArr[prediction10 != np.mat(testLabelArr).T].sum() / 67 self.assertEqual(16.0 / 67, err_rate) print("Test Error: %f%%" % (err_rate * 100)) # 绘制ROC和计算AUC val_auc = adb.plot_roc(aggClassEst, labelArr) self.assertLessEqual(0.8582969635, round(val_auc, 10))
D = np.multiply(D, np.exp(expon)) D = D / np.sum(D) agg_class_est += alpha * predict_value print("agg_class_est: ", agg_class_est.T) # np.sign(agg_class_est) != np.mat(class_labels).T 将错误的置位1 对的置位0 便于统计错误的个数 agg_errors = np.multiply( np.sign(agg_class_est) != np.mat(class_labels).T, np.ones((m, 1))) error_rate = agg_errors.sum() / m print('total error:', error_rate) if error_rate == 0: break return weak_class_arr if __name__ == '__main__': # 加载简单的数据 dat_arr, labels_arr = adaboost.load_simple_data() # show_plot(dat_arr, labels_arr) m = np.shape(dat_arr)[0] D = np.mat(np.ones((m, 1)) / m) # 最佳单层决策树 # best_stump, error, predict_value, = build_stump(dat_arr, labels_arr, D) # print(best_stump, error) # print(predict_value) weak_class_arr = adaboost_train_ds(dat_arr, labels_arr) print('********' * 10) print(weak_class_arr)
agg_error = np.multiply(np.sign(agg_class_est) != np.mat(class_labels).T, np.ones((m, 1))) error_rate = agg_error.sum()/m <<<<<<< HEAD print "total error: ", error_rate, "\n" ======= # print "total error: ", error_rate, "\n" >>>>>>> 68d745d9f4a5c0b75e4d74d7d47153b544aab2b9 if error_rate == 0: break return weak_class_arr <<<<<<< HEAD if __name__ == '__main__': import adaboost data_mat, class_labels = adaboost.load_simple_data() a = adaboost.adaboost_train(data_mat, class_labels, 9) print a ======= def ada_classify(datto_class, classifier_arr): data_mat = np.matrix(datto_class) m = data_mat.shape[0] agg_class_est = np.zeros((m, 1)) for i in range(len(classifier_arr)): class_est = stump_classify(data_mat, classifier_arr[i]["dim"], classifier_arr[i]["threshold"], classifier_arr[i]["inequality"]) agg_class_est += classifier_arr[i]["alpha"] * class_est # print agg_class_est # print np.sign(agg_class_est) return np.sign(agg_class_est)