コード例 #1
0
ファイル: test_bicluster.py プロジェクト: Aathi410/Pro123
def test_perfect_checkerboard():
    # XXX Previously failed on build bot (not reproducible)
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1
コード例 #2
0
def test_spectral_coclustering():
    # Test Dhillon's Spectral CoClustering on a simple problem.
    param_grid = {
        'svd_method': ['randomized', 'arpack'],
        'n_svd_vecs': [None, 20],
        'mini_batch': [False, True],
        'init': ['k-means++'],
        'n_init': [10],
        'n_jobs': [1]
    }
    random_state = 0
    S, rows, cols = make_biclusters((30, 30),
                                    3,
                                    noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

            _test_shape_indices(model)
コード例 #3
0
ファイル: test_spectral.py プロジェクト: 1oscar/scikit-learn
def test_spectral_coclustering():
    """Test Dhillon's Spectral CoClustering on a simple problem."""
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
コード例 #4
0
def test_perfect_checkerboard():
    raise SkipTest("This test is failing on the buildbot, but cannot"
                   " reproduce. Temporarily disabling it until it can be"
                   " reproduced and  fixed.")
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
コード例 #5
0
def test_consensus_score_issue2445():
    """ Different number of biclusters in A and B"""
    a_rows = np.array([[True, True, False, False], [False, False, True, True], [False, False, False, True]])
    a_cols = np.array([[True, True, False, False], [False, False, True, True], [False, False, False, True]])
    idx = [0, 2]
    s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
    # B contains 2 of the 3 biclusters in A, so score should be 2/3
    assert_almost_equal(s, 2.0 / 3.0)
コード例 #6
0
ファイル: test_bicluster.py プロジェクト: 745698140/test_1
def test_consensus_score_issue2445():
    ''' Different number of biclusters in A and B'''
    a_rows = np.array([[True, True, False, False], [False, False, True, True],
                       [False, False, False, True]])
    a_cols = np.array([[True, True, False, False], [False, False, True, True],
                       [False, False, False, True]])
    idx = [0, 2]
    s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
    # B contains 2 of the 3 biclusters in A, so score should be 2/3
    assert_almost_equal(s, 2.0 / 3.0)
コード例 #7
0
ファイル: biclustering.py プロジェクト: gitter-badger/PyGMQL
    def bicluster_similarity(self, reference_model):
        """
        Calculates the similarity between the current model of biclusters and the reference model of biclusters

        :param reference_model: The reference model of biclusters
        :return: Returns the consensus score(Hochreiter et. al., 2010), i.e. the similarity of two sets of biclusters.
        """
        similarity_score = consensus_score(self.model.biclusters_,
                                           reference_model.biclusters_)
        return similarity_score
コード例 #8
0
def test_consensus_score():
    a = [[True, True, False, False], [False, False, True, True]]
    b = a[::-1]

    assert_equal(consensus_score((a, a), (a, a)), 1)
    assert_equal(consensus_score((a, a), (b, b)), 1)
    assert_equal(consensus_score((a, b), (a, b)), 1)
    assert_equal(consensus_score((a, b), (b, a)), 1)

    assert_equal(consensus_score((a, a), (b, a)), 0)
    assert_equal(consensus_score((a, a), (a, b)), 0)
    assert_equal(consensus_score((b, b), (a, b)), 0)
    assert_equal(consensus_score((b, b), (b, a)), 0)
コード例 #9
0
def test_consensus_score():
    a = [[True, True, False, False], [False, False, True, True]]
    b = a[::-1]

    assert_equal(consensus_score((a, a), (a, a)), 1)
    assert_equal(consensus_score((a, a), (b, b)), 1)
    assert_equal(consensus_score((a, b), (a, b)), 1)
    assert_equal(consensus_score((a, b), (b, a)), 1)

    assert_equal(consensus_score((a, a), (b, a)), 0)
    assert_equal(consensus_score((a, a), (a, b)), 0)
    assert_equal(consensus_score((b, b), (a, b)), 0)
    assert_equal(consensus_score((b, b), (b, a)), 0)
コード例 #10
0
ファイル: test_bicluster.py プロジェクト: 745698140/test_1
def test_consensus_score():
    a = [[True, True, False, False], [False, False, True, True]]
    b = a[::-1]

    assert consensus_score((a, a), (a, a)) == 1
    assert consensus_score((a, a), (b, b)) == 1
    assert consensus_score((a, b), (a, b)) == 1
    assert consensus_score((a, b), (b, a)) == 1

    assert consensus_score((a, a), (b, a)) == 0
    assert consensus_score((a, a), (a, b)) == 0
    assert consensus_score((b, b), (a, b)) == 0
    assert consensus_score((b, b), (b, a)) == 0
コード例 #11
0
ファイル: test_spectral.py プロジェクト: 1oscar/scikit-learn
def test_perfect_checkerboard():
    raise SkipTest("This test is failing on the buildbot, but cannot"
                   " reproduce. Temporarily disabling it until it can be"
                   " reproduced and  fixed.")
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)
コード例 #12
0
ファイル: test_bicluster.py プロジェクト: xcz011/scikit-learn
def test_spectral_biclustering():
    # Test Kluger methods on a checkerboard dataset.
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=0)

    non_default_params = {'method': ['scale', 'log'],
                          'svd_method': ['arpack'],
                          'n_svd_vecs': [20],
                          'mini_batch': [True]}

    for mat in (S, csr_matrix(S)):
        for param_name, param_values in non_default_params.items():
            for param_value in param_values:

                model = SpectralBiclustering(
                    n_clusters=3,
                    n_init=3,
                    init='k-means++',
                    random_state=0,
                )
                model.set_params(**dict([(param_name, param_value)]))

                if issparse(mat) and model.get_params().get('method') == 'log':
                    # cannot take log of sparse matrix
                    with pytest.raises(ValueError):
                        model.fit(mat)
                    continue
                else:
                    model.fit(mat)

                assert model.rows_.shape == (9, 30)
                assert model.columns_.shape == (9, 30)
                assert_array_equal(model.rows_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_array_equal(model.columns_.sum(axis=0),
                                   np.repeat(3, 30))
                assert consensus_score(model.biclusters_,
                                       (rows, cols)) == 1

                _test_shape_indices(model)
コード例 #13
0
    def jaccard(self, estimator, train=None):
        """Computes the Jaccard coefficient as a measure of similarity between
        two sets of biclusters.

        The Jaccard index achieves its minimum of zero when the biclusters to
        not overlap at all and its maximum of one when they are identical.

        Args:
            estimator (object): The fitted model holding bicluster esitmates.
            train: Ignored.

        Returns:
            (float): The Jaccard coefficient value.

        """

        rows, cols = estimator.biclusters_
        if len(rows) == 0 or len(cols) == 0:
            return 0.0
        else:
            ytrue = (self._rows[:, self.row_idx], self._cols[:, self.col_idx])
            return consensus_score((rows, cols), ytrue, similarity='jaccard')
コード例 #14
0
def test_spectral_biclustering():
    # Test Kluger methods on a checkerboard dataset.
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=0)

    non_default_params = {'method': ['scale', 'log'],
                          'svd_method': ['arpack'],
                          'n_svd_vecs': [20],
                          'mini_batch': [True]}

    for mat in (S, csr_matrix(S)):
        for param_name, param_values in non_default_params.items():
            for param_value in param_values:

                model = SpectralBiclustering(
                    n_clusters=3,
                    n_init=3,
                    init='k-means++',
                    random_state=0,
                )
                model.set_params(**dict([(param_name, param_value)]))

                if issparse(mat) and model.get_params().get('method') == 'log':
                    # cannot take log of sparse matrix
                    assert_raises(ValueError, model.fit, mat)
                    continue
                else:
                    model.fit(mat)

                assert_equal(model.rows_.shape, (9, 30))
                assert_equal(model.columns_.shape, (9, 30))
                assert_array_equal(model.rows_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_array_equal(model.columns_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_equal(consensus_score(model.biclusters_,
                                             (rows, cols)), 1)

                _test_shape_indices(model)
コード例 #15
0
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(shape=(300, 300),
                                      n_clusters=5,
                                      noise=5,
                                      shuffle=False,
                                      random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))

print("consensus score: {:.3f}".format(score))

fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")

plt.show()
コード例 #16
0
def jaccard_consensus(a, b):
    '''Get similarity between individual biclusters,
    a and b, calculated using the Jaccard index'''
    score = consensus_score(a, b)
    print("consensus score: {:.3f}".format(score))
コード例 #17
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
   File Name:biClusteringL
   Description : 双聚类,对行列同时进行聚类
   Email : [email protected]
   Date:2018/1/1
"""

from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.datasets import make_biclusters
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=0.5, random_state=0)

model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_, (rows, columns))
print('scores: {}'.format(score))
コード例 #18
0
n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=10,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralBiclustering(n_clusters=n_clusters, method='log',
                             random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))

print("consensus score: {:.1f}".format(score))

fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")

plt.matshow(np.outer(np.sort(model.row_labels_) + 1,
                     np.sort(model.column_labels_) + 1),
            cmap=plt.cm.Blues)
plt.title("Checkerboard structure of rearranged data")

#plt.show()
コード例 #19
0
def test_co_clustering():

    import numpy as np
    import nibabel as nb
    from matplotlib import pyplot as plt
    import sklearn as sk
    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    # REAL DATA
    subject_file = '/Users/aki.nikolaidis/Desktop/NKI_SampleData/A00060280/3mm_bandpassed_demeaned_filtered_antswarp.nii.gz'
    roi_mask_file = home + '/git_repo/basc/masks/BG_3mm.nii.gz'
    roi2_mask_file = home + '/git_repo/basc/masks/yeo2_3mm.nii.gz'

    data = nb.load(subject_file).get_data().astype('float32')
    print('Data Loaded')

    print('Setting up NIS')
    roi_mask_file_nb = nb.load(roi_mask_file)
    roi2_mask_file_nb = nb.load(roi2_mask_file)

    roi_mask_nparray = nb.load(roi_mask_file).get_data().astype(
        'float32').astype('bool')
    roi2_mask_nparray = nb.load(roi2_mask_file).get_data().astype(
        'float32').astype('bool')

    roi1data = data[roi_mask_nparray]
    roi2data = data[roi2_mask_nparray]

    #add code that uploads the roi1data and roi2data, divides by the mean and standard deviation of the timeseries
    roi1data = sk.preprocessing.normalize(roi1data, norm='l2')
    roi2data = sk.preprocessing.normalize(roi2data, norm='l2')

    dist_btwn_data_1_2 = np.array(
        sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation'))
    sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2
    sim_btwn_data_1_2[np.isnan(sim_btwn_data_1_2)] = 0
    sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0

    sim_btwn_data_1_2 = sim_btwn_data_1_2 + (np.random.rand(
        len(sim_btwn_data_1_2), len(sim_btwn_data_1_2[1, :]))) / 100
    sim_btwn_data_1_2[sim_btwn_data_1_2 > 1] = 1

    sum(sum(sim_btwn_data_1_2 == np.inf))
    sum(sum(sim_btwn_data_1_2 == np.nan))

    model = SpectralCoclustering(n_clusters=5, random_state=0, n_init=100)
    model.fit(sim_btwn_data_1_2)

    fit_data = sim_btwn_data_1_2[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()

    #SIMULATION DATA
    import numpy as np
    from matplotlib import pyplot as plt

    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    #Creating Simulated Data
    data, rows, columns = make_biclusters(shape=(300, 100),
                                          n_clusters=5,
                                          noise=5,
                                          shuffle=False,
                                          random_state=0)

    plt.matshow(data, cmap=plt.cm.Blues)
    plt.title("Original dataset")

    data, row_idx, col_idx = sg._shuffle(data, random_state=0)
    plt.matshow(data, cmap=plt.cm.Blues)
    plt.title("Shuffled dataset")

    #Creating Model
    model = SpectralCoclustering(n_clusters=5, random_state=0)
    model.fit(data)
    score = consensus_score(model.biclusters_,
                            (rows[:, row_idx], columns[:, col_idx]))

    print("consensus score: {:.3f}".format(score))

    fit_data = data[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()

    ####################################################################
    ####################################################################
    from sklearn import cluster
    import scipy as sp
    import time
    from sklearn import cluster, datasets
    import numpy as np
    from matplotlib import pyplot as plt

    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    data1 = generate_simple_blobs(27)
    data2 = generate_simple_blobs(27)
    data2 = data2[0:150, :]

    print("Calculating Cross-clustering")
    print("Calculating pairwise distances between areas")

    dist_btwn_data_1_2 = np.array(
        sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation'))
    sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2
    sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0
    co_cluster = cluster.SpectralCoclustering()
    co_cluster.fit(sim_btwn_data_1_2)
    score = consensus_score(co_cluster.biclusters_,
                            (rows[:, row_idx], columns[:, col_idx]))

    print("consensus score: {:.3f}".format(score))

    fit_data = data[np.argsort(co_cluster.row_labels_)]
    fit_data = fit_data[:, np.argsort(co_cluster.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()