def test_k_folds_decision_tree(self):
     folds = dt.generate_k_folds(self.dataset, 20)
     for fold in folds:
         tree = dt.DecisionTree()
         features, classes = fold[1]
         tree.fit(features, classes)
         testf, testc = fold[0]
         output = tree.classify(testf)
Beispiel #2
0
def accuracy_test(num_folds):
    features, classes = loaddata()
    folds = generate_k_folds((features, classes), num_folds)
    for i in range(num_folds):
        print('Testing model on fold %d' % i)
        myClassifier = ChallengeClassifier()
        training_data, test_data = folds[i]
        training_features, training_classes = training_data
        test_features, test_classes = test_data
        myClassifier.fit(training_features, training_classes)
        result = myClassifier.classify(test_features)
        print('Accuracy test result - fold %d: %f' %
              (i, accuracy(result, test_classes)))
    def test_k_folds_training_set_count(self):
        """Test k folds returns the correct training set size.

        Asserts:
            training set size matches as expected.
        """

        example_count = len(self.train_features)
        k = 10
        training_set_count = example_count - (example_count // k)
        ten_folds = dt.generate_k_folds(self.dataset, k)

        for fold in ten_folds:
            training_set, test_set = fold
            assert len(training_set[0]) == training_set_count
    def test_forest_all_data(self):
        """Test random forest.
        Asserts:
            classification on average is >= 75%
        """
        # Values specified in the assignment
        tree = dt.RandomForest(5, 5, .5, .5)
        fold_count = 10
        ten_folds = dt.generate_k_folds(self.dataset, fold_count)
        avg_accuracy = 0.0

        for fold in ten_folds:
            training_set, test_set = fold
            tree.fit(training_set[0], training_set[1])
            output = tree.classify(test_set[0])
            avg_accuracy += dt.accuracy(output, test_set[1])

        avg_accuracy = avg_accuracy / fold_count
        print '\n\nRandom Forest K-folds:', avg_accuracy
        assert avg_accuracy >= .75
    def test_challenge_all_data(self):
        """Test challenge section.
        Asserts:
            classification on average is >= 80%
        """
        dataset = dt.load_csv('challenge_train.csv', 0)

        #  Change as you see fit by adding parameters you have chosen or run
        #  it with defaults
        tree = dt.ChallengeClassifier()
        fold_count = 10
        ten_folds = dt.generate_k_folds(dataset, fold_count)
        avg_accuracy = 0.0

        for fold in ten_folds:
            training_set, test_set = fold
            tree.fit(training_set[0], training_set[1])
            output = tree.classify(test_set[0])
            avg_accuracy += dt.accuracy(output, test_set[1])

        avg_accuracy = avg_accuracy / fold_count
        print '\n\nChallenger K-folds:', avg_accuracy
        assert avg_accuracy >= .80