def test_resample_stratified_replace(): # Make sure stratified resampling supports the replace parameter rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 1)) y = rng.randint(0, 2, size=n_samples) X_replace, _ = resample(X, y, replace=True, n_samples=50, random_state=rng, stratify=y) X_no_replace, _ = resample(X, y, replace=False, n_samples=50, random_state=rng, stratify=y) assert np.unique(X_replace).shape[0] < 50 assert np.unique(X_no_replace).shape[0] == 50 # make sure n_samples can be greater than X.shape[0] if we sample with # replacement X_replace, _ = resample(X, y, replace=True, n_samples=1000, random_state=rng, stratify=y) assert X_replace.shape[0] == 1000 assert np.unique(X_replace).shape[0] == 100
def test_resample(): # Border case not worth mentioning in doctests assert resample() is None # Check that invalid arguments yield ValueError assert_raises(ValueError, resample, [0], [0, 1]) assert_raises(ValueError, resample, [0, 1], [0, 1], replace=False, n_samples=3) assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42) # Issue:6581, n_samples can be more when replace is True (default). assert len(resample([1, 2], n_samples=5)) == 5
def test_resample_stratify_2dy(): # Make sure y can be 2d when stratifying rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 1)) y = rng.randint(0, 2, size=(n_samples, 2)) X, y = resample(X, y, n_samples=50, random_state=rng, stratify=y) assert y.ndim == 2
def test_resample_stratified(): # Make sure resample can stratify rng = np.random.RandomState(0) n_samples = 100 p = .9 X = rng.normal(size=(n_samples, 1)) y = rng.binomial(1, p, size=n_samples) _, y_not_stratified = resample(X, y, n_samples=10, random_state=0, stratify=None) assert np.all(y_not_stratified == 1) _, y_stratified = resample(X, y, n_samples=10, random_state=0, stratify=y) assert not np.all(y_stratified == 1) assert np.sum(y_stratified) == 9 # all 1s, one 0
def test_resample_stratify_sparse_error(): # resample must be ndarray rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 2)) y = rng.randint(0, 2, size=n_samples) stratify = sp.csr_matrix(y) with pytest.raises(TypeError, match='A sparse matrix was passed'): X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify)