Exemple #1
0
class TestBoost(unittest.TestCase):
    def setUp(self):
        self.clf = AdaBoost(n_learners=3)
        self.clf.fit(X, y)

    def testAll(self):

        # ===================================================== 
        # prediction test 
        # ===================================================== 
        pred = self.clf.predict(X)
        for (p, yi) in zip(pred, y):
            self.assertAlmostEqual(p, yi)

        # ===================================================== 
        # score test 
        # ===================================================== 
        score = self.clf.score(X, y)
        self.assertAlmostEqual(score, 1.0)

        # ===================================================== 
        # alphas test 
        # ===================================================== 
        alphas = self.clf.alpha
        self.assertAlmostEqual(alphas[0], 0.42364893019360172)
        self.assertAlmostEqual(alphas[1], 0.64964149206513044)
        self.assertAlmostEqual(alphas[2], 0.92291334524916524)

        # ===================================================== 
        # stage_score test 
        # ===================================================== 
        staged_score = self.clf.staged_score(X, y)
        self.assertAlmostEqual(staged_score[0], 0.7)
        self.assertAlmostEqual(staged_score[1], 0.7)
        self.assertAlmostEqual(staged_score[2], 1.0)
Exemple #2
0
def Question3():
    depths = [1, 2, 3, 4]
    num_learners = [1, 10, 50, 100, 300]

    data = FoursAndNines("../data/mnist.pkl.gz")
    print 'Size of training set', data.x_train.shape[0]
    print 'Size of validation set', data.x_valid.shape[0]
    accuracy = []
    clf = AdaBoost(n_learners=max(num_learners), base=MultinomialNB())

    print 'Fitting classifier...'
    clf.fit(data.x_train, data.y_train)

    print 'Calculating scores...'
    temp_learners = clf.learners
    for num_learner in num_learners:
        clf.learners = temp_learners[:num_learner]
        print 'Number of learners', len(clf.learners)
        score = clf.score(data.x_valid, data.y_valid)
        print 'Score at iteration %d is %f' % (num_learner, score)
        accuracy.append(score)

    plt.plot(num_learners, accuracy, 'o')

    plt.title(
        'Accuracy of Multinomial Naive Bayes\nvs the number of weak learners')
    plt.ylabel('Accuracy')
    plt.xlabel('Number of weak learners')
    plt.savefig('q3.png')
Exemple #3
0
def Question2():
    depths = [1, 2, 3, 4]
    num_learners = [1, 10, 50, 100, 300]

    data = FoursAndNines("../data/mnist.pkl.gz")
    print 'Size of training set', data.x_train.shape[0]
    print 'Size of validation set', data.x_valid.shape[0]
    for depth in depths:
        print 'Tree depth', depth
        accuracy = []
        clf = AdaBoost(n_learners=max(num_learners),
                       base=DecisionTreeClassifier(max_depth=depth,
                                                   criterion="entropy"))

        print 'Fitting classifier...'
        clf.fit(data.x_train, data.y_train)

        print 'Calculating scores...'
        temp_learners = clf.learners
        for num_learner in num_learners:
            clf.learners = temp_learners[:num_learner]
            print 'Number of learners', len(clf.learners)
            # score = clf.score(data.x_valid, data.y_valid)
            score = clf.score(data.x_train, data.y_train)
            print 'Score at iteration %d with depth %d is %f' % (num_learner,
                                                                 depth, score)
            accuracy.append(score)

        plt.plot(num_learners, accuracy, 'o', label=str(depth))

    plt.title(
        'Accuracy of decision tree boosting vs the\nnumber of weak learners at a certain depth with training set'
    )
    plt.ylabel('Accuracy')
    plt.xlabel('Number of weak learners')
    plt.legend(title='Depth', numpoints=1)
    plt.savefig('q2_train_accuracy.png')
Exemple #4
0
 def setUp(self):
     self.clf = AdaBoost(n_learners=3)
     self.clf.fit(X, y)