Beispiel #1
0
def test_perfect_checkerboard():
    # XXX Previously failed on build bot (not reproducible)
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1
def test_spectral_coclustering():
    # Test Dhillon's Spectral CoClustering on a simple problem.
    param_grid = {
        'svd_method': ['randomized', 'arpack'],
        'n_svd_vecs': [None, 20],
        'mini_batch': [False, True],
        'init': ['k-means++'],
        'n_init': [10],
        'n_jobs': [1]
    }
    random_state = 0
    S, rows, cols = make_biclusters((30, 30),
                                    3,
                                    noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

            _test_shape_indices(model)
Beispiel #3
0
def test_spectral_coclustering():
    """Test Dhillon's Spectral CoClustering on a simple problem."""
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
def test_perfect_checkerboard():
    raise SkipTest("This test is failing on the buildbot, but cannot"
                   " reproduce. Temporarily disabling it until it can be"
                   " reproduced and  fixed.")
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_consensus_score_issue2445():
    """ Different number of biclusters in A and B"""
    a_rows = np.array([[True, True, False, False], [False, False, True, True], [False, False, False, True]])
    a_cols = np.array([[True, True, False, False], [False, False, True, True], [False, False, False, True]])
    idx = [0, 2]
    s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
    # B contains 2 of the 3 biclusters in A, so score should be 2/3
    assert_almost_equal(s, 2.0 / 3.0)
Beispiel #6
0
def test_consensus_score_issue2445():
    ''' Different number of biclusters in A and B'''
    a_rows = np.array([[True, True, False, False], [False, False, True, True],
                       [False, False, False, True]])
    a_cols = np.array([[True, True, False, False], [False, False, True, True],
                       [False, False, False, True]])
    idx = [0, 2]
    s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
    # B contains 2 of the 3 biclusters in A, so score should be 2/3
    assert_almost_equal(s, 2.0 / 3.0)
Beispiel #7
0
    def bicluster_similarity(self, reference_model):
        """
        Calculates the similarity between the current model of biclusters and the reference model of biclusters

        :param reference_model: The reference model of biclusters
        :return: Returns the consensus score(Hochreiter et. al., 2010), i.e. the similarity of two sets of biclusters.
        """
        similarity_score = consensus_score(self.model.biclusters_,
                                           reference_model.biclusters_)
        return similarity_score
def test_consensus_score():
    a = [[True, True, False, False], [False, False, True, True]]
    b = a[::-1]

    assert_equal(consensus_score((a, a), (a, a)), 1)
    assert_equal(consensus_score((a, a), (b, b)), 1)
    assert_equal(consensus_score((a, b), (a, b)), 1)
    assert_equal(consensus_score((a, b), (b, a)), 1)

    assert_equal(consensus_score((a, a), (b, a)), 0)
    assert_equal(consensus_score((a, a), (a, b)), 0)
    assert_equal(consensus_score((b, b), (a, b)), 0)
    assert_equal(consensus_score((b, b), (b, a)), 0)
Beispiel #9
0
def test_consensus_score():
    a = [[True, True, False, False], [False, False, True, True]]
    b = a[::-1]

    assert_equal(consensus_score((a, a), (a, a)), 1)
    assert_equal(consensus_score((a, a), (b, b)), 1)
    assert_equal(consensus_score((a, b), (a, b)), 1)
    assert_equal(consensus_score((a, b), (b, a)), 1)

    assert_equal(consensus_score((a, a), (b, a)), 0)
    assert_equal(consensus_score((a, a), (a, b)), 0)
    assert_equal(consensus_score((b, b), (a, b)), 0)
    assert_equal(consensus_score((b, b), (b, a)), 0)
Beispiel #10
0
def test_consensus_score():
    a = [[True, True, False, False], [False, False, True, True]]
    b = a[::-1]

    assert consensus_score((a, a), (a, a)) == 1
    assert consensus_score((a, a), (b, b)) == 1
    assert consensus_score((a, b), (a, b)) == 1
    assert consensus_score((a, b), (b, a)) == 1

    assert consensus_score((a, a), (b, a)) == 0
    assert consensus_score((a, a), (a, b)) == 0
    assert consensus_score((b, b), (a, b)) == 0
    assert consensus_score((b, b), (b, a)) == 0
Beispiel #11
0
def test_perfect_checkerboard():
    raise SkipTest("This test is failing on the buildbot, but cannot"
                   " reproduce. Temporarily disabling it until it can be"
                   " reproduced and  fixed.")
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)
Beispiel #12
0
def test_spectral_biclustering():
    # Test Kluger methods on a checkerboard dataset.
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=0)

    non_default_params = {'method': ['scale', 'log'],
                          'svd_method': ['arpack'],
                          'n_svd_vecs': [20],
                          'mini_batch': [True]}

    for mat in (S, csr_matrix(S)):
        for param_name, param_values in non_default_params.items():
            for param_value in param_values:

                model = SpectralBiclustering(
                    n_clusters=3,
                    n_init=3,
                    init='k-means++',
                    random_state=0,
                )
                model.set_params(**dict([(param_name, param_value)]))

                if issparse(mat) and model.get_params().get('method') == 'log':
                    # cannot take log of sparse matrix
                    with pytest.raises(ValueError):
                        model.fit(mat)
                    continue
                else:
                    model.fit(mat)

                assert model.rows_.shape == (9, 30)
                assert model.columns_.shape == (9, 30)
                assert_array_equal(model.rows_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_array_equal(model.columns_.sum(axis=0),
                                   np.repeat(3, 30))
                assert consensus_score(model.biclusters_,
                                       (rows, cols)) == 1

                _test_shape_indices(model)
Beispiel #13
0
    def jaccard(self, estimator, train=None):
        """Computes the Jaccard coefficient as a measure of similarity between
        two sets of biclusters.

        The Jaccard index achieves its minimum of zero when the biclusters to
        not overlap at all and its maximum of one when they are identical.

        Args:
            estimator (object): The fitted model holding bicluster esitmates.
            train: Ignored.

        Returns:
            (float): The Jaccard coefficient value.

        """

        rows, cols = estimator.biclusters_
        if len(rows) == 0 or len(cols) == 0:
            return 0.0
        else:
            ytrue = (self._rows[:, self.row_idx], self._cols[:, self.col_idx])
            return consensus_score((rows, cols), ytrue, similarity='jaccard')
def test_spectral_biclustering():
    # Test Kluger methods on a checkerboard dataset.
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=0)

    non_default_params = {'method': ['scale', 'log'],
                          'svd_method': ['arpack'],
                          'n_svd_vecs': [20],
                          'mini_batch': [True]}

    for mat in (S, csr_matrix(S)):
        for param_name, param_values in non_default_params.items():
            for param_value in param_values:

                model = SpectralBiclustering(
                    n_clusters=3,
                    n_init=3,
                    init='k-means++',
                    random_state=0,
                )
                model.set_params(**dict([(param_name, param_value)]))

                if issparse(mat) and model.get_params().get('method') == 'log':
                    # cannot take log of sparse matrix
                    assert_raises(ValueError, model.fit, mat)
                    continue
                else:
                    model.fit(mat)

                assert_equal(model.rows_.shape, (9, 30))
                assert_equal(model.columns_.shape, (9, 30))
                assert_array_equal(model.rows_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_array_equal(model.columns_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_equal(consensus_score(model.biclusters_,
                                             (rows, cols)), 1)

                _test_shape_indices(model)
Beispiel #15
0
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(shape=(300, 300),
                                      n_clusters=5,
                                      noise=5,
                                      shuffle=False,
                                      random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))

print("consensus score: {:.3f}".format(score))

fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")

plt.show()
Beispiel #16
0
def jaccard_consensus(a, b):
    '''Get similarity between individual biclusters,
    a and b, calculated using the Jaccard index'''
    score = consensus_score(a, b)
    print("consensus score: {:.3f}".format(score))
Beispiel #17
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
   File Name:biClusteringL
   Description : 双聚类,对行列同时进行聚类
   Email : [email protected]
   Date:2018/1/1
"""

from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.datasets import make_biclusters
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=0.5, random_state=0)

model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_, (rows, columns))
print('scores: {}'.format(score))
Beispiel #18
0
n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=10,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralBiclustering(n_clusters=n_clusters, method='log',
                             random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))

print("consensus score: {:.1f}".format(score))

fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")

plt.matshow(np.outer(np.sort(model.row_labels_) + 1,
                     np.sort(model.column_labels_) + 1),
            cmap=plt.cm.Blues)
plt.title("Checkerboard structure of rearranged data")

#plt.show()
Beispiel #19
0
def test_co_clustering():

    import numpy as np
    import nibabel as nb
    from matplotlib import pyplot as plt
    import sklearn as sk
    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    # REAL DATA
    subject_file = '/Users/aki.nikolaidis/Desktop/NKI_SampleData/A00060280/3mm_bandpassed_demeaned_filtered_antswarp.nii.gz'
    roi_mask_file = home + '/git_repo/basc/masks/BG_3mm.nii.gz'
    roi2_mask_file = home + '/git_repo/basc/masks/yeo2_3mm.nii.gz'

    data = nb.load(subject_file).get_data().astype('float32')
    print('Data Loaded')

    print('Setting up NIS')
    roi_mask_file_nb = nb.load(roi_mask_file)
    roi2_mask_file_nb = nb.load(roi2_mask_file)

    roi_mask_nparray = nb.load(roi_mask_file).get_data().astype(
        'float32').astype('bool')
    roi2_mask_nparray = nb.load(roi2_mask_file).get_data().astype(
        'float32').astype('bool')

    roi1data = data[roi_mask_nparray]
    roi2data = data[roi2_mask_nparray]

    #add code that uploads the roi1data and roi2data, divides by the mean and standard deviation of the timeseries
    roi1data = sk.preprocessing.normalize(roi1data, norm='l2')
    roi2data = sk.preprocessing.normalize(roi2data, norm='l2')

    dist_btwn_data_1_2 = np.array(
        sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation'))
    sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2
    sim_btwn_data_1_2[np.isnan(sim_btwn_data_1_2)] = 0
    sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0

    sim_btwn_data_1_2 = sim_btwn_data_1_2 + (np.random.rand(
        len(sim_btwn_data_1_2), len(sim_btwn_data_1_2[1, :]))) / 100
    sim_btwn_data_1_2[sim_btwn_data_1_2 > 1] = 1

    sum(sum(sim_btwn_data_1_2 == np.inf))
    sum(sum(sim_btwn_data_1_2 == np.nan))

    model = SpectralCoclustering(n_clusters=5, random_state=0, n_init=100)
    model.fit(sim_btwn_data_1_2)

    fit_data = sim_btwn_data_1_2[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()

    #SIMULATION DATA
    import numpy as np
    from matplotlib import pyplot as plt

    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    #Creating Simulated Data
    data, rows, columns = make_biclusters(shape=(300, 100),
                                          n_clusters=5,
                                          noise=5,
                                          shuffle=False,
                                          random_state=0)

    plt.matshow(data, cmap=plt.cm.Blues)
    plt.title("Original dataset")

    data, row_idx, col_idx = sg._shuffle(data, random_state=0)
    plt.matshow(data, cmap=plt.cm.Blues)
    plt.title("Shuffled dataset")

    #Creating Model
    model = SpectralCoclustering(n_clusters=5, random_state=0)
    model.fit(data)
    score = consensus_score(model.biclusters_,
                            (rows[:, row_idx], columns[:, col_idx]))

    print("consensus score: {:.3f}".format(score))

    fit_data = data[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()

    ####################################################################
    ####################################################################
    from sklearn import cluster
    import scipy as sp
    import time
    from sklearn import cluster, datasets
    import numpy as np
    from matplotlib import pyplot as plt

    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    data1 = generate_simple_blobs(27)
    data2 = generate_simple_blobs(27)
    data2 = data2[0:150, :]

    print("Calculating Cross-clustering")
    print("Calculating pairwise distances between areas")

    dist_btwn_data_1_2 = np.array(
        sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation'))
    sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2
    sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0
    co_cluster = cluster.SpectralCoclustering()
    co_cluster.fit(sim_btwn_data_1_2)
    score = consensus_score(co_cluster.biclusters_,
                            (rows[:, row_idx], columns[:, col_idx]))

    print("consensus score: {:.3f}".format(score))

    fit_data = data[np.argsort(co_cluster.row_labels_)]
    fit_data = fit_data[:, np.argsort(co_cluster.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()