예제 #1
0
def accuracy_test(num_folds):
    features, classes = loaddata()
    folds = generate_k_folds((features, classes), num_folds)
    for i in range(num_folds):
        print('Testing model on fold %d' % i)
        myClassifier = ChallengeClassifier()
        training_data, test_data = folds[i]
        training_features, training_classes = training_data
        test_features, test_classes = test_data
        myClassifier.fit(training_features, training_classes)
        result = myClassifier.classify(test_features)
        print('Accuracy test result - fold %d: %f' %
              (i, accuracy(result, test_classes)))
예제 #2
0
    def test_random_forest_5_trees(self):
        learner = dt.RandomForest(num_trees=5,
                                  depth_limit=5,
                                  example_subsample_rate=0.5,
                                  attr_subsample_rate=0.5)
        learner.fit(self.train_features, self.train_classes)
        output = learner.classify(self.train_features)

        result = dt.confusion_matrix(output, self.train_classes)
        print "\n\nconfusion_matrix={}".format(result)
        print "accuracy={}".format(dt.accuracy(output, self.train_classes))
        print "precision={}".format(dt.precision(output, self.train_classes))
        print "recall={}".format(dt.recall(output, self.train_classes))
    def test_accuracy_calculation(self):
        """Test accuracy calculation.

        Asserts:
            Accuracy matches for all true labels.
        """

        answer = [0, 0, 0, 0, 0]
        true_label = [1, 1, 1, 1, 1]
        total_count = len(answer)

        for index in range(0, len(answer)):
            answer[index] = 1
            accuracy = dt.accuracy(answer, true_label)

            assert accuracy == ((index + 1) / total_count)
    def test_forest_all_data(self):
        """Test random forest.
        Asserts:
            classification on average is >= 75%
        """
        # Values specified in the assignment
        tree = dt.RandomForest(5, 5, .5, .5)
        fold_count = 10
        ten_folds = dt.generate_k_folds(self.dataset, fold_count)
        avg_accuracy = 0.0

        for fold in ten_folds:
            training_set, test_set = fold
            tree.fit(training_set[0], training_set[1])
            output = tree.classify(test_set[0])
            avg_accuracy += dt.accuracy(output, test_set[1])

        avg_accuracy = avg_accuracy / fold_count
        print '\n\nRandom Forest K-folds:', avg_accuracy
        assert avg_accuracy >= .75
 def test_random_forest_5_trees(self):
     path = abspath("challenge_train.csv")
     self.train_features, self.train_classes = dt.load_csv(path, 0)
     #print classes
     #        learner = dt.ChallengeClassifier()
     #        learner.fit(features, classes)
     #        output = learner.classify(features)
     #        print output
     #        result = dt.confusion_matrix(output, classes)
     #        print "\n\nconfusion_matrix={}".format(result)
     #        print "accuracy={}".format(dt.accuracy(output, classes))
     #        print "precision={}".format(dt.precision(output, classes))
     #        print "recall={}".format(dt.recall(output, classes))
     learner = dt.ChallengeClassifier()
     learner.fit(self.train_features, self.train_classes)
     output = learner.classify(self.train_features)
     print output
     result = dt.confusion_matrix(output, self.train_classes)
     print "\n\nconfusion_matrix={}".format(result)
     print "accuracy={}".format(dt.accuracy(output, self.train_classes))
     print "precision={}".format(dt.precision(output, self.train_classes))
     print "recall={}".format(dt.recall(output, self.train_classes))
    def test_challenge_all_data(self):
        """Test challenge section.
        Asserts:
            classification on average is >= 80%
        """
        dataset = dt.load_csv('challenge_train.csv', 0)

        #  Change as you see fit by adding parameters you have chosen or run
        #  it with defaults
        tree = dt.ChallengeClassifier()
        fold_count = 10
        ten_folds = dt.generate_k_folds(dataset, fold_count)
        avg_accuracy = 0.0

        for fold in ten_folds:
            training_set, test_set = fold
            tree.fit(training_set[0], training_set[1])
            output = tree.classify(test_set[0])
            avg_accuracy += dt.accuracy(output, test_set[1])

        avg_accuracy = avg_accuracy / fold_count
        print '\n\nChallenger K-folds:', avg_accuracy
        assert avg_accuracy >= .80