def accuracy_test(num_folds): features, classes = loaddata() folds = generate_k_folds((features, classes), num_folds) for i in range(num_folds): print('Testing model on fold %d' % i) myClassifier = ChallengeClassifier() training_data, test_data = folds[i] training_features, training_classes = training_data test_features, test_classes = test_data myClassifier.fit(training_features, training_classes) result = myClassifier.classify(test_features) print('Accuracy test result - fold %d: %f' % (i, accuracy(result, test_classes)))
def test_k_folds_training_set_count(self): """Test k folds returns the correct training set size. Asserts: training set size matches as expected. """ example_count = len(self.train_features) k = 10 training_set_count = example_count - (example_count // k) ten_folds = dt.generate_k_folds(self.dataset, k) for fold in ten_folds: training_set, test_set = fold assert len(training_set[0]) == training_set_count
def test_clf(params): dataset = dt.load_csv('challenge_train.csv', 0) # pdb.set_trace() train_features, train_classes = dataset folds = dt.generate_k_folds(dataset, 5) accuracy = [] for idx, fold in enumerate(folds): training_set, test_set = fold clf = dt.ChallengeClassifier(**params) clf.fit(training_set[0], training_set[1]) preds = clf.classify(test_set[0]) accuracy.append(dt.accuracy(preds, test_set[1])) # print("Fold %d" %idx) # print("accuracy %f" %(dt.accuracy(preds, test_set[1]))) # print("precision %f" %(dt.precision(preds, test_set[1]))) # print("recall %f" %(dt.recall(preds, test_set[1]))) print(params, np.mean(accuracy))