Example #1
0
    def test_kfolds_should_not_have_repeated_indexes(self):
        n_rows = 30
        n_folds = 10

        folds = kfolds(n_rows, n_folds)

        for i in range(0, n_folds):
            indexes = folds[i].train_index + folds[i].test_index
            self.assertTrue(len(indexes) == len(set(indexes)))
Example #2
0
    def test_kfolds_should_split_data_evenly_if_n_rows_equals_n_folds(self):
        n_rows_and_folds = 10

        folds = kfolds(n_rows_and_folds, n_rows_and_folds)

        self.assertEqual(len(folds), n_rows_and_folds)

        for i in range(0, n_rows_and_folds):
            self.assertEqual((len(folds[i].train_index)), 1)
            self.assertEqual((len(folds[i].test_index)), 9)
Example #3
0
    def test_kfolds_should_split_data_evenly(self):
        n_rows = 30
        n_folds = 10

        folds = kfolds(n_rows, n_folds)

        self.assertEqual(len(folds), n_folds)

        for i in range(0, n_folds):
            self.assertEqual((len(folds[i].train_index)), 3)
            self.assertEqual((len(folds[i].test_index)), 27)
Example #4
0
    def test_kfolds_should_account_for_uneven_data(self):
        n_rows = 29
        n_folds = 10

        folds = kfolds(n_rows, n_folds)

        self.assertEqual(len(folds), n_folds)

        for i in range(0, n_folds - 1):
            self.assertEqual((len(folds[i].train_index)), 3)
            self.assertEqual((len(folds[i].test_index)), 26)

        self.assertEqual((len(folds[n_folds - 1].train_index)), 2)
        self.assertEqual((len(folds[n_folds - 1].test_index)), 27)
Example #5
0
    def test_kfolds_should_not_generate_test_folds_with_repeated_indexes(self):
        n_rows = 30
        n_folds = 10

        folds = kfolds(n_rows, n_folds)

        train_indexes = set()
        test_indexes = set()

        for i in range(0, n_folds):
            train_indexes |= set(folds[i].train_index)
            test_indexes |= set(folds[i].test_index)

        self.assertEqual(len(train_indexes), n_rows)
        self.assertEqual(len(test_indexes), n_rows)
Example #6
0
def cross_validate(data, n_folds, n_iterations):
    # TODO Should I make it so you can pass in a ML algorithm?
    # TODO Should this be a part of the perceptron?
    sum_accuracy = 0

    perceptron = Perceptron(get_n_columns(data.features), data.feature_names)

    folds = kfolds(get_n_rows(data.features), n_folds)

    for fold in folds:
        perceptron.train(fold.train_index, data, n_iterations)
        accuracy = perceptron.test(fold.test_index, data)
        sum_accuracy += accuracy

        print('Accuracy: %.9f' % accuracy)

    print('Average accuracy: %.9f' % (sum_accuracy / n_folds))
Example #7
0
    def test_kfolds_should_throw_exception_if_not_enough_data(self):
        n_rows = 9
        n_folds = 10

        with self.assertRaises(ValueError):
            kfolds(n_rows, n_folds)