Esempio n. 1
0
    def test_sorted_stratified_k2(self):
        # another split scheme
        k = SortedStratifiedKFold(2, shuffle=True, random_state=9)
        split1, split2 = [9.1, 7.1, 5.1, 3.1, 1.1], [8.1, 2.1, 6.1, 4.1]

        for (train, test), exp in zip(k.split(self.X, self.y), [(split1, split2), (split2, split1)]):
            assert_array_equal(self.y[train], exp[0])
            assert_array_equal(self.y[test], exp[1])
            assert_array_equal(self.X[train], np.array(exp[0])[:, np.newaxis])
            assert_array_equal(self.X[test], np.array(exp[1])[:, np.newaxis])
Esempio n. 2
0
    def test_sorted_stratified_k3(self):
        k = SortedStratifiedKFold(3, shuffle=True, random_state=9)
        splits = [[9.1, 8.1, 5.1, 3.1, 2.1, 4.1], [7.1, 1.1, 6.1],
                  [7.1, 8.1, 1.1, 2.1, 6.1, 4.1], [9.1, 5.1, 3.1],
                  [9.1, 7.1, 5.1, 3.1, 1.1, 6.1], [8.1, 2.1, 4.1]]

        for i, (train, test) in enumerate(k.split(self.X, self.y)):
            exp_train = splits[i * 2]
            exp_test = splits[i * 2 + 1]
            assert_array_equal(self.y[train], exp_train)
            assert_array_equal(self.y[test], exp_test)
            assert_array_equal(self.X[train], np.array(exp_train)[:, np.newaxis])
            assert_array_equal(self.X[test], np.array(exp_test)[:, np.newaxis])
Esempio n. 3
0
    def test_sorted_stratified(self):
        n = self.y.shape[0]
        for k in (3, 2):
            ssk = SortedStratifiedKFold(k, shuffle=True)
            for train, test in ssk.split(self.X, self.y):
                # check the size of the test fold
                ni = int(n / k)
                self.assertTrue(test.shape[0] == ni or test.shape[0] == ni + 1)

                # check every data point is either in train or fold and only once
                idx = np.concatenate([train, test])
                idx.sort()
                assert_array_equal(idx, np.arange(n))

                # check there is a value in each bin in the test fold
                y_test = self.y[test]
                for i in range(1, k + 1):
                    cutoff = i * k + 0.1
                    self.assertEqual(np.sum(y_test <= cutoff), i)