def setUp(self): """Set up test data. """ self.restaurant = { 'restaurants': [0] * 6 + [1] * 6, 'split_patrons': [[0, 0], [1, 1, 1, 1], [1, 1, 0, 0, 0, 0]], 'split_food_type': [[0, 1], [0, 1], [0, 0, 1, 1], [0, 0, 1, 1]] } self.dataset = dt.load_csv('part23_data.csv') self.train_features, self.train_classes = self.dataset self.challenge_set = dt.load_csv('challenge_train.csv', 0) self.challenge_set_features, self.challenge_set_classes = self.challenge_set
def test_random_forest_5_trees(self): path = abspath("challenge_train.csv") self.train_features, self.train_classes = dt.load_csv(path, 0) #print classes # learner = dt.ChallengeClassifier() # learner.fit(features, classes) # output = learner.classify(features) # print output # result = dt.confusion_matrix(output, classes) # print "\n\nconfusion_matrix={}".format(result) # print "accuracy={}".format(dt.accuracy(output, classes)) # print "precision={}".format(dt.precision(output, classes)) # print "recall={}".format(dt.recall(output, classes)) learner = dt.ChallengeClassifier() learner.fit(self.train_features, self.train_classes) output = learner.classify(self.train_features) print output result = dt.confusion_matrix(output, self.train_classes) print "\n\nconfusion_matrix={}".format(result) print "accuracy={}".format(dt.accuracy(output, self.train_classes)) print "precision={}".format(dt.precision(output, self.train_classes)) print "recall={}".format(dt.recall(output, self.train_classes))
def test_challenge_all_data(self): """Test challenge section. Asserts: classification on average is >= 80% """ dataset = dt.load_csv('challenge_train.csv', 0) # Change as you see fit by adding parameters you have chosen or run # it with defaults tree = dt.ChallengeClassifier() fold_count = 10 ten_folds = dt.generate_k_folds(dataset, fold_count) avg_accuracy = 0.0 for fold in ten_folds: training_set, test_set = fold tree.fit(training_set[0], training_set[1]) output = tree.classify(test_set[0]) avg_accuracy += dt.accuracy(output, test_set[1]) avg_accuracy = avg_accuracy / fold_count print '\n\nChallenger K-folds:', avg_accuracy assert avg_accuracy >= .80
def setUp(self): """Set up test data. """ self.vector = dt.Vectorization() self.data = dt.load_csv('vectorize.csv', 1)