def predict(tr_data_arr, tr_label_arr, test_data_arr, test_label_arr): data_arr = np.mat(tr_data_arr) f_label_arr = [] for i in tr_label_arr: if i == 1: f_label_arr.append(i) else: f_label_arr.append(-1) classifier_arr, agg_class_est = ada_boost_train_ds( data_arr, f_label_arr, 30) test_label_arr = np.mat([[i] for i in test_label_arr]) test_label_arr[test_label_arr == 0] = -1 pred_arr = ada_classify(test_data_arr, classifier_arr) diff = test_label_arr - pred_arr error = (diff != [0]).sum() test_data_len = len(test_data_arr) accuracy = (test_data_len - error) * 100.0 / test_data_len return test_data_len, error, accuracy
return best_stump, min_error, best_class_est def ada_classify(dat_to_class, classifier_arr): data_mat = np.mat(dat_to_class) m = data_mat.shape[0] agg_class_est = np.mat(np.zeros((m, 1))) for i in xrange(len(classifier_arr)): class_est = stump_classify(data_mat, classifier_arr[i]['dim'], classifier_arr[i]['thresh'], classifier_arr[i]['ineq']) agg_class_est += classifier_arr[i]['alpha'] * class_est """ 随着迭代进行,分类的结果会越来越强。 即小于0的分类越来越小,大于0的越来越大 """ print("agg_class_est:", agg_class_est) return np.sign(agg_class_est) if __name__ == '__main__': from adaboost import load_simp_data, ada_boost_train_ds data_mat, class_labels = load_simp_data() # D = np.mat(np.ones((5, 1)) / 5) # best_stump, min_error, best_class_est = build_stump(data_mat, class_labels, D) # print best_stump classifier_arr = ada_boost_train_ds(data_mat, class_labels, 30) res = ada_classify([1.3, 1.2], classifier_arr) print("res: ", res)