Exemple #1
0
def accuracy_test(num_folds):
    features, classes = loaddata()
    folds = generate_k_folds((features, classes), num_folds)
    for i in range(num_folds):
        print('Testing model on fold %d' % i)
        myClassifier = ChallengeClassifier()
        training_data, test_data = folds[i]
        training_features, training_classes = training_data
        test_features, test_classes = test_data
        myClassifier.fit(training_features, training_classes)
        result = myClassifier.classify(test_features)
        print('Accuracy test result - fold %d: %f' % (i, accuracy(result, test_classes)))
    def test_k_folds_training_set_count(self):
        """Test k folds returns the correct training set size.

        Asserts:
            training set size matches as expected.
        """

        example_count = len(self.train_features)
        k = 10
        training_set_count = example_count - (example_count // k)
        ten_folds = dt.generate_k_folds(self.dataset, k)

        for fold in ten_folds:
            training_set, test_set = fold

            assert len(training_set[0]) == training_set_count
Exemple #3
0
def test_clf(params):
    dataset = dt.load_csv('challenge_train.csv', 0)
    # pdb.set_trace()
    train_features, train_classes = dataset
    folds = dt.generate_k_folds(dataset, 5)
    accuracy = []

    for idx, fold in enumerate(folds):
        training_set, test_set = fold
        clf = dt.ChallengeClassifier(**params)
        clf.fit(training_set[0], training_set[1])
        preds = clf.classify(test_set[0])
        accuracy.append(dt.accuracy(preds, test_set[1]))
        # print("Fold %d" %idx)
        # print("accuracy %f" %(dt.accuracy(preds, test_set[1])))
        # print("precision %f" %(dt.precision(preds, test_set[1])))
        # print("recall %f" %(dt.recall(preds, test_set[1])))
    print(params, np.mean(accuracy))