class TestBoost(unittest.TestCase): def setUp(self): self.clf = AdaBoost(n_learners=3) self.clf.fit(X, y) def testAll(self): # ===================================================== # prediction test # ===================================================== pred = self.clf.predict(X) for (p, yi) in zip(pred, y): self.assertAlmostEqual(p, yi) # ===================================================== # score test # ===================================================== score = self.clf.score(X, y) self.assertAlmostEqual(score, 1.0) # ===================================================== # alphas test # ===================================================== alphas = self.clf.alpha self.assertAlmostEqual(alphas[0], 0.42364893019360172) self.assertAlmostEqual(alphas[1], 0.64964149206513044) self.assertAlmostEqual(alphas[2], 0.92291334524916524) # ===================================================== # stage_score test # ===================================================== staged_score = self.clf.staged_score(X, y) self.assertAlmostEqual(staged_score[0], 0.7) self.assertAlmostEqual(staged_score[1], 0.7) self.assertAlmostEqual(staged_score[2], 1.0)
def Question3(): depths = [1, 2, 3, 4] num_learners = [1, 10, 50, 100, 300] data = FoursAndNines("../data/mnist.pkl.gz") print 'Size of training set', data.x_train.shape[0] print 'Size of validation set', data.x_valid.shape[0] accuracy = [] clf = AdaBoost(n_learners=max(num_learners), base=MultinomialNB()) print 'Fitting classifier...' clf.fit(data.x_train, data.y_train) print 'Calculating scores...' temp_learners = clf.learners for num_learner in num_learners: clf.learners = temp_learners[:num_learner] print 'Number of learners', len(clf.learners) score = clf.score(data.x_valid, data.y_valid) print 'Score at iteration %d is %f' % (num_learner, score) accuracy.append(score) plt.plot(num_learners, accuracy, 'o') plt.title( 'Accuracy of Multinomial Naive Bayes\nvs the number of weak learners') plt.ylabel('Accuracy') plt.xlabel('Number of weak learners') plt.savefig('q3.png')
def Question2(): depths = [1, 2, 3, 4] num_learners = [1, 10, 50, 100, 300] data = FoursAndNines("../data/mnist.pkl.gz") print 'Size of training set', data.x_train.shape[0] print 'Size of validation set', data.x_valid.shape[0] for depth in depths: print 'Tree depth', depth accuracy = [] clf = AdaBoost(n_learners=max(num_learners), base=DecisionTreeClassifier(max_depth=depth, criterion="entropy")) print 'Fitting classifier...' clf.fit(data.x_train, data.y_train) print 'Calculating scores...' temp_learners = clf.learners for num_learner in num_learners: clf.learners = temp_learners[:num_learner] print 'Number of learners', len(clf.learners) # score = clf.score(data.x_valid, data.y_valid) score = clf.score(data.x_train, data.y_train) print 'Score at iteration %d with depth %d is %f' % (num_learner, depth, score) accuracy.append(score) plt.plot(num_learners, accuracy, 'o', label=str(depth)) plt.title( 'Accuracy of decision tree boosting vs the\nnumber of weak learners at a certain depth with training set' ) plt.ylabel('Accuracy') plt.xlabel('Number of weak learners') plt.legend(title='Depth', numpoints=1) plt.savefig('q2_train_accuracy.png')
def setUp(self): self.clf = AdaBoost(n_learners=3) self.clf.fit(X, y)