def check_boosting(): np.random.seed(5) test_classifiers = set() s_set = {1, -1} b_set = {1, 3, 5, 7, 9, 11} for s in s_set: for b in b_set: test_classifiers.add(DecisionStump(s, b, 0)) test_features = [[2], [4], [6], [8], [10]] test_labels = [1, 1, -1, -1, 1] try: import boosting except: return 0 try: test_ada1 = boosting.AdaBoost(test_classifiers, 1) test_ada2 = boosting.AdaBoost(test_classifiers, 3) test_ada1.train(test_features, test_labels) test_ada2.train(test_features, test_labels) predictions_ada1 = test_ada1.predict(test_features) predictions_ada2 = test_ada2.predict(test_features) if predictions_ada1 == [1, 1, -1, -1, -1 ] and predictions_ada2 == [1, 1, -1, -1, 1]: score_ada = 0.5 else: score_ada = 0 except: score_ada = 0 try: test_logit1 = boosting.LogitBoost(test_classifiers, 1) test_logit2 = boosting.LogitBoost(test_classifiers, 3) test_logit1.train(test_features, test_labels) test_logit2.train(test_features, test_labels) predictions_logit1 = test_logit1.predict(test_features) predictions_logit2 = test_logit2.predict(test_features) if predictions_logit1 == [1, 1, -1, -1, -1] and predictions_logit2 == [ 1, 1, -1, -1, 1 ]: score_logit = 0.5 else: score_logit = 0 except: score_logit = 0 return round(score_ada + score_logit, 1)
# print(parts[:i]+parts[i+1:]) d, lb = list( map(np.array, zip(*(sum(parts[:i] + parts[i + 1:], start=[]))))) self.classifiers[i].fit(d, lb) # print(parts[i]) acc = np.mean([ int(self.classifiers[i].predict(data_point) == label) for data_point, label in parts[i] ]) accuracies.append(acc) return np.mean(accuracies) if __name__ == '__main__': iris = datasets.load_iris() vectorized_map = np.vectorize(lambda x, mean: x > mean) tmp = iris['data'] data = np.array( [vectorized_map(tmp[:, i], np.mean(tmp[:, i])) for i in range(4)]).T labels = iris['target'] data, labels = list(zip(*shuffle(list(zip(data[:99], labels[:99]))))) data = np.array(data) labels = np.array(labels) classifier = boosting.AdaBoost(DecisionTreeClassifier, 5, .5, sklearn=True) labels = classifier.change_labels(labels, possible_labels={1, 0, -1}) validator = CrossValidation(5, classifier) print(validator.evaluate(data, labels))
assert(len(y_test) == 20) # set classifiers h_set = set() s_set = {1, -1} b_set = set(np.linspace(0, 10, 51)) d_set = {0, 1, 2, 3} for s in s_set: for b in b_set: for d in d_set: h_set.add(decision_stump.DecisionStump(s,b,d)) # training Adas = [] for idx, T in enumerate([10, 20, 30]): Adas.append(boosting.AdaBoost(h_set, T=T)) Adas[idx].train(X_train, y_train) # testing Ada_preds = [] Ada_accus = [] for Ada in Adas: pred = Ada.predict(X_test) Ada_preds.append(pred) Ada_accus.append(accuracy_score(pred, y_test)) print('AdaBoost testing accuracies:', Ada_accus) # save json.dump({'Ada_preds': Ada_preds, 'Ada_accus': Ada_accus},
d in {0, 1} ''' # data features_1, labels_1 = data_loader.toy_data_1() # clfs h_set = set() s_set = {1, -1} b_set = {-2, -0.5, 0.5, 2} d_set = {0, 1} for s in s_set: for b in b_set: for d in d_set: h_set.add(decision_stump.DecisionStump(s, b, d)) # boost Ada_1 = boosting.AdaBoost(h_set, T=1) Ada_1.train(features_1, labels_1) # check print('━━━━━━━━━━ Toy example 1 ━━━━━━━━━━') print('This toy example checks the format. Any of the stump is correct.') print('(Can you explain why?)') print('Ada_1: s = {:01d}, b = {:.1f}, d = {:01d}'.format( Ada_1.clfs_picked[0].s, Ada_1.clfs_picked[0].b, Ada_1.clfs_picked[0].d)) if Ada_1.betas[0] == 0: print('Betas are correct') else: print('▁▂▃▄▅▆▇█ Betas are not correct █▇▆▅▄▃▂▁') #################