def test_perfect_checkerboard(): model = SpectralBiclustering(3, svd_method="arpack", random_state=0) S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0) model.fit(S) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1) S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0) model.fit(S) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1) S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0) model.fit(S) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_spectral_biclustering(): """Test Kluger methods on a checkerboard dataset.""" param_grid = {'method': ['scale', 'bistochastic', 'log'], 'svd_method': ['randomized', 'arpack'], 'n_svd_vecs': [None, 20], 'mini_batch': [False, True], 'init': ['k-means++'], 'n_init': [10], 'n_jobs': [1]} random_state = 0 S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5, random_state=random_state) for mat in (S, csr_matrix(S)): for kwargs in ParameterGrid(param_grid): model = SpectralBiclustering(n_clusters=3, random_state=random_state, **kwargs) if issparse(mat) and kwargs['method'] == 'log': # cannot take log of sparse matrix assert_raises(ValueError, model.fit, mat) continue else: model.fit(mat) assert_equal(model.rows_.shape, (9, 30)) assert_equal(model.columns_.shape, (9, 30)) assert_array_equal(model.rows_.sum(axis=0), np.repeat(3, 30)) assert_array_equal(model.columns_.sum(axis=0), np.repeat(3, 30)) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_spectral_coclustering(): """Test Dhillon's Spectral CoClustering on a simple problem.""" param_grid = {'svd_method': ['randomized', 'arpack'], 'n_svd_vecs': [None, 20], 'mini_batch': [False, True], 'init': ['k-means++'], 'n_init': [10], 'n_jobs': [1]} random_state = 0 S, rows, cols = make_biclusters((30, 30), 3, noise=0.5, random_state=random_state) S -= S.min() # needs to be nonnegative before making it sparse S = np.where(S < 1, 0, S) # threshold some values for mat in (S, csr_matrix(S)): for kwargs in ParameterGrid(param_grid): model = SpectralCoclustering(n_clusters=3, random_state=random_state, **kwargs) model.fit(mat) assert_equal(model.rows_.shape, (3, 30)) assert_array_equal(model.rows_.sum(axis=0), np.ones(30)) assert_array_equal(model.columns_.sum(axis=0), np.ones(30)) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_spectral_biclustering(): """Test Kluger methods on a checkerboard dataset.""" param_grid = {'method': ['scale', 'bistochastic', 'log'], 'svd_method': ['randomized', 'arpack'], 'n_svd_vecs': [None, 20], 'mini_batch': [False, True], 'init': ['k-means++'], 'n_init': [3], 'n_jobs': [1]} random_state = 0 S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5, random_state=random_state) for mat in (S, csr_matrix(S)): for kwargs in ParameterGrid(param_grid): model = SpectralBiclustering(n_clusters=3, random_state=random_state, **kwargs) if issparse(mat) and kwargs['method'] == 'log': # cannot take log of sparse matrix assert_raises(ValueError, model.fit, mat) continue else: model.fit(mat) assert_equal(model.rows_.shape, (9, 30)) assert_equal(model.columns_.shape, (9, 30)) assert_array_equal(model.rows_.sum(axis=0), np.repeat(3, 30)) assert_array_equal(model.columns_.sum(axis=0), np.repeat(3, 30)) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_perfect_checkerboard(): raise SkipTest("This test is failing on the buildbot, but cannot" " reproduce. Temporarily disabling it until it can be" " reproduced and fixed.") model = SpectralBiclustering(3, svd_method="arpack", random_state=0) S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0) model.fit(S) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1) S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0) model.fit(S) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1) S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0) model.fit(S) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_spectral_biclustering(): """Test Kluger methods on a checkerboard dataset.""" S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5, random_state=0) non_default_params = {'method': ['scale', 'log'], 'svd_method': ['arpack'], 'n_svd_vecs': [20], 'mini_batch': [True]} for mat in (S, csr_matrix(S)): for param_name, param_values in non_default_params.items(): for param_value in param_values: model = SpectralBiclustering( n_clusters=3, n_init=3, init='k-means++', random_state=0, ) model.set_params(**dict([(param_name, param_value)])) if issparse(mat) and model.get_params().get('method') == 'log': # cannot take log of sparse matrix assert_raises(ValueError, model.fit, mat) continue else: model.fit(mat) assert_equal(model.rows_.shape, (9, 30)) assert_equal(model.columns_.shape, (9, 30)) assert_array_equal(model.rows_.sum(axis=0), np.repeat(3, 30)) assert_array_equal(model.columns_.sum(axis=0), np.repeat(3, 30)) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)