def setUp(self):
        """Set up test data.
        """

        self.restaurant = {
            'restaurants': [0] * 6 + [1] * 6,
            'split_patrons': [[0, 0], [1, 1, 1, 1], [1, 1, 0, 0, 0, 0]],
            'split_food_type': [[0, 1], [0, 1], [0, 0, 1, 1], [0, 0, 1, 1]]
        }

        self.dataset = dt.load_csv('part23_data.csv')
        self.train_features, self.train_classes = self.dataset

        self.challenge_set = dt.load_csv('challenge_train.csv', 0)
        self.challenge_set_features, self.challenge_set_classes = self.challenge_set
 def test_random_forest_5_trees(self):
     path = abspath("challenge_train.csv")
     self.train_features, self.train_classes = dt.load_csv(path, 0)
     #print classes
     #        learner = dt.ChallengeClassifier()
     #        learner.fit(features, classes)
     #        output = learner.classify(features)
     #        print output
     #        result = dt.confusion_matrix(output, classes)
     #        print "\n\nconfusion_matrix={}".format(result)
     #        print "accuracy={}".format(dt.accuracy(output, classes))
     #        print "precision={}".format(dt.precision(output, classes))
     #        print "recall={}".format(dt.recall(output, classes))
     learner = dt.ChallengeClassifier()
     learner.fit(self.train_features, self.train_classes)
     output = learner.classify(self.train_features)
     print output
     result = dt.confusion_matrix(output, self.train_classes)
     print "\n\nconfusion_matrix={}".format(result)
     print "accuracy={}".format(dt.accuracy(output, self.train_classes))
     print "precision={}".format(dt.precision(output, self.train_classes))
     print "recall={}".format(dt.recall(output, self.train_classes))
    def test_challenge_all_data(self):
        """Test challenge section.
        Asserts:
            classification on average is >= 80%
        """
        dataset = dt.load_csv('challenge_train.csv', 0)

        #  Change as you see fit by adding parameters you have chosen or run
        #  it with defaults
        tree = dt.ChallengeClassifier()
        fold_count = 10
        ten_folds = dt.generate_k_folds(dataset, fold_count)
        avg_accuracy = 0.0

        for fold in ten_folds:
            training_set, test_set = fold
            tree.fit(training_set[0], training_set[1])
            output = tree.classify(test_set[0])
            avg_accuracy += dt.accuracy(output, test_set[1])

        avg_accuracy = avg_accuracy / fold_count
        print '\n\nChallenger K-folds:', avg_accuracy
        assert avg_accuracy >= .80
    def setUp(self):
        """Set up test data.
        """

        self.vector = dt.Vectorization()
        self.data = dt.load_csv('vectorize.csv', 1)