ylabel='Accuracy %',
           title='Accuracy with bagging(Base={})'.format(100 *
                                                         (1 - base_accuracy)))
    ax.legend()
    for xy in zip(num_samples, error_rates):
        ax.annotate('(%s, %s)' % xy, xy=xy, textcoords='data')
    fig.savefig('../data/bagging_accuracy_result.png')


if __name__ == '__main__':
    training_data = data_loader.load_diabetes_train_from_file()
    testing_data = data_loader.load_diabetes_test_from_file()

    # Testing 1 DTree
    tree = DTree(sample=False)
    tree.train(training_data)
    tree.test(testing_data)
    print('Error rate for single learner is {}'.format(
        100 * (1 - tree.last_error_rate)))

    # Testing ensemble
    error_rates = []
    for num_sample in [1, 3, 5, 10, 20]:
        for max_depth in [None, 1, 2, 3, 6]:
            bagger = Bagging(num_samples=num_sample, max_depth=max_depth)
            bagger.train_ensemble(training_data)
            error_rate = bagger.test(testing_data)[0]
            if max_depth is None:
                error_rates.append(error_rate)
            print('Num Samples: {} Depth: {} Accuracy: {}'.format(
                num_sample, max_depth, 100 * (1 - error_rate)))
Example #2
0
 def train_ensemble(self, training_data):
     for i in range(self._num_samples):
         curr_tree = DTree(sample=True, max_depth=self._max_depth)
         curr_tree.train(training_data)
         self._learners.append(curr_tree)