def test_sorted_stratified_k2(self): # another split scheme k = SortedStratifiedKFold(2, shuffle=True, random_state=9) split1, split2 = [9.1, 7.1, 5.1, 3.1, 1.1], [8.1, 2.1, 6.1, 4.1] for (train, test), exp in zip(k.split(self.X, self.y), [(split1, split2), (split2, split1)]): assert_array_equal(self.y[train], exp[0]) assert_array_equal(self.y[test], exp[1]) assert_array_equal(self.X[train], np.array(exp[0])[:, np.newaxis]) assert_array_equal(self.X[test], np.array(exp[1])[:, np.newaxis])
def test_sorted_stratified_k3(self): k = SortedStratifiedKFold(3, shuffle=True, random_state=9) splits = [[9.1, 8.1, 5.1, 3.1, 2.1, 4.1], [7.1, 1.1, 6.1], [7.1, 8.1, 1.1, 2.1, 6.1, 4.1], [9.1, 5.1, 3.1], [9.1, 7.1, 5.1, 3.1, 1.1, 6.1], [8.1, 2.1, 4.1]] for i, (train, test) in enumerate(k.split(self.X, self.y)): exp_train = splits[i * 2] exp_test = splits[i * 2 + 1] assert_array_equal(self.y[train], exp_train) assert_array_equal(self.y[test], exp_test) assert_array_equal(self.X[train], np.array(exp_train)[:, np.newaxis]) assert_array_equal(self.X[test], np.array(exp_test)[:, np.newaxis])
def test_sorted_stratified(self): n = self.y.shape[0] for k in (3, 2): ssk = SortedStratifiedKFold(k, shuffle=True) for train, test in ssk.split(self.X, self.y): # check the size of the test fold ni = int(n / k) self.assertTrue(test.shape[0] == ni or test.shape[0] == ni + 1) # check every data point is either in train or fold and only once idx = np.concatenate([train, test]) idx.sort() assert_array_equal(idx, np.arange(n)) # check there is a value in each bin in the test fold y_test = self.y[test] for i in range(1, k + 1): cutoff = i * k + 0.1 self.assertEqual(np.sum(y_test <= cutoff), i)