def test_make_biclusters():
    X, rows, cols = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0)
    assert_equal(X.shape, (100, 100), "X shape mismatch")
    assert_equal(rows.shape, (4, 100), "rows shape mismatch")
    assert_equal(cols.shape, (4, 100), "columns shape mismatch")
    assert_all_finite(X)
    assert_all_finite(rows)
    assert_all_finite(cols)

    X2, _, _ = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0)
    assert_array_equal(X, X2)
def test_make_biclusters():
    X, rows, cols = make_biclusters(
        shape=(100, 100), n_clusters=4, shuffle=True, random_state=0)
    assert_equal(X.shape, (100, 100), "X shape mismatch")
    assert_equal(rows.shape, (4, 100), "rows shape mismatch")
    assert_equal(cols.shape, (4, 100,), "columns shape mismatch")
    assert_all_finite(X)
    assert_all_finite(rows)
    assert_all_finite(cols)

    X2, _, _ = make_biclusters(shape=(100, 100), n_clusters=4,
                               shuffle=True, random_state=0)
    assert_array_almost_equal(X, X2)
Exemple #3
0
def test_spectral_coclustering():
    """Test Dhillon's Spectral CoClustering on a simple problem."""
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
Exemple #4
0
def test_n_features_in_(est):

    X, _, _ = make_biclusters((3, 3), 3, random_state=0)

    assert not hasattr(est, "n_features_in_")
    est.fit(X)
    assert est.n_features_in_ == 3
def test_spectral_coclustering():
    # Test Dhillon's Spectral CoClustering on a simple problem.
    param_grid = {
        'svd_method': ['randomized', 'arpack'],
        'n_svd_vecs': [None, 20],
        'mini_batch': [False, True],
        'init': ['k-means++'],
        'n_init': [10],
        'n_jobs': [1]
    }
    random_state = 0
    S, rows, cols = make_biclusters((30, 30),
                                    3,
                                    noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

            _test_shape_indices(model)
def test_n_jobs_deprecated(klass, n_jobs):
    # FIXME: remove in 0.25
    depr_msg = ("'n_jobs' was deprecated in version 0.23 and will be removed "
                "in 0.25.")
    S, _, _ = make_biclusters((30, 30), 3, noise=0.5, random_state=0)
    est = klass(random_state=0, n_jobs=n_jobs)

    with pytest.warns(FutureWarning, match=depr_msg):
        est.fit(S)
    def sample_generators_datasets(self) :
        """
            Sample generators
        """

        logging.debug('----------------- Sample generators (Single label) -----------')

        blobs = datasets.make_blobs()
        print('blobs  for ' , blobs)

        print('classification' , datasets.make_classification())
        print('gaussian quantiles' , datasets.make_gaussian_quantiles())

        print('----------------- Sample generators ( Multilabel) -----------')

        print('multilabel_classification' , datasets.make_multilabel_classification())
        print('make_biclusters' , datasets.make_biclusters(shape=(300, 300) , n_clusters=5))
        print('make_checkerboard' , datasets.make_multilabel_classification())
Exemple #8
0

def plot_posterior_predictive(ax, X, Z, title, colors, cmap="RdBu_r"):
    ax.contourf(*Xspace, Z, cmap=cmap, alpha=0.7, levels=20)
    ax.scatter(*X.T, c=colors, edgecolors="gray", s=80)
    ax.set_title(title)
    ax.axis("off")
    plt.tight_layout()


# ** Generating training data **
key = random.PRNGKey(314)
n_datapoints, ndims = 50, 2
X, rows, cols = make_biclusters((n_datapoints, ndims),
                                2,
                                noise=0.6,
                                random_state=3141,
                                minval=-4,
                                maxval=4)
y = rows[0] * 1.0

alpha = 1.0
init_noise = 1.0
Phi = jnp.c_[jnp.ones(n_datapoints)[:, None], X]  # Design matrix
ndata, ndims = Phi.shape

# ** MCMC Sampling with BlackJAX **
sigma_mcmc = 0.8
w0 = random.multivariate_normal(key, jnp.zeros(ndims),
                                jnp.eye(ndims) * init_noise)
energy = partial(E_base, Phi=Phi, y=y, alpha=alpha)
initial_state = mh.new_state(w0, energy)
"""
print(__doc__)

# Author: Kemal Eren <*****@*****.**>
# License: BSD 3 clause

import numpy as np
from matplotlib import pyplot as plt

from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(
    shape=(300, 300), n_clusters=5, noise=5,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))

print "consensus score: {:.3f}".format(score)
Exemple #10
0

def logjoint_fn(params, data, predict_fn):
    return loglikelihood_fn(params, *data, predict_fn) + logprior_fn(params)


key = jax.random.PRNGKey(0)

## Data generating process
n_datapoints = 50
m = 2
noise = 0.6
bound = 4
X, rows, _ = make_biclusters((n_datapoints, m),
                             2,
                             noise=noise,
                             random_state=3141,
                             minval=-bound,
                             maxval=bound)

# whether datapoints belong to class 1
y = rows[0] * 1.0

Phi = jnp.c_[jnp.ones(n_datapoints)[:, None], X]
nfeatures = Phi.shape[-1]

# Model
model = LogReg()
init_key, key = jax.random.split(key)
variables = model.init(init_key, Phi)

# colors = ["black" if el else "white" for el in y]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
   File Name:biClusteringL
   Description : 双聚类,对行列同时进行聚类
   Email : [email protected]
   Date:2018/1/1
"""

from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.datasets import make_biclusters
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=0.5, random_state=0)

model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_, (rows, columns))
print('scores: {}'.format(score))
def getSKData(style='timeseries', as_dataframe=False, n_samples=10, **kwargs):
    if style == 'regression':
        return make_regression(n_samples,
                               kwargs.get('n_features', RegressionArgs.n_features),
                               kwargs.get('n_informative', RegressionArgs.n_informative),
                               kwargs.get('n_targets', RegressionArgs.n_targets),
                               kwargs.get('bias', RegressionArgs.bias),
                               kwargs.get('effective_rank', RegressionArgs.effective_rank),
                               kwargs.get('tail_strength', RegressionArgs.tail_strength),
                               kwargs.get('noise', RegressionArgs.noise),
                               kwargs.get('shuffle', RegressionArgs.shuffle),
                               kwargs.get('coef', RegressionArgs.coef),
                               kwargs.get('random_state', RegressionArgs.random_state))
    elif style == 'blobs':
        return make_blobs(n_samples,
                          kwargs.get('n_features', BlobsArgs.n_features),
                          kwargs.get('centers', BlobsArgs.centers),
                          kwargs.get('cluster_std', BlobsArgs.cluster_std),
                          kwargs.get('center_box', BlobsArgs.center_box),
                          kwargs.get('shuffle', BlobsArgs.shuffle),
                          kwargs.get('random_state', BlobsArgs.random_state))
    elif style == 'classification':
        return make_classification(n_samples,
                                   kwargs.get('n_features', ClassificationArgs.n_features),
                                   kwargs.get('n_informative', ClassificationArgs.n_informative),
                                   kwargs.get('n_redundant', ClassificationArgs.n_redundant),
                                   kwargs.get('n_repeated', ClassificationArgs.n_repeated),
                                   kwargs.get('n_classes', ClassificationArgs.n_classes),
                                   kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class),
                                   kwargs.get('weights', ClassificationArgs.weights),
                                   kwargs.get('flip_y', ClassificationArgs.flip_y),
                                   kwargs.get('class_sep', ClassificationArgs.class_sep),
                                   kwargs.get('hypercube', ClassificationArgs.hypercube),
                                   kwargs.get('shift', ClassificationArgs.shift),
                                   kwargs.get('scale', ClassificationArgs.scale),
                                   kwargs.get('shuffle', ClassificationArgs.shuffle),
                                   kwargs.get('random_state', ClassificationArgs.random_state))
    elif style == 'multilabel':
        return make_multilabel_classification(n_samples,
                                              kwargs.get('n_features', MultilabelClassificationArgs.n_features),
                                              kwargs.get('n_classes', MultilabelClassificationArgs.n_classes),
                                              kwargs.get('n_labels', MultilabelClassificationArgs.n_labels),
                                              kwargs.get('length', MultilabelClassificationArgs.length),
                                              kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled),
                                              kwargs.get('sparse', MultilabelClassificationArgs.sparse),
                                              kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator),
                                              kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions),
                                              kwargs.get('random_state', MultilabelClassificationArgs.random_state))
    elif style == 'gaussian':
        return make_gaussian_quantiles(n_samples=n_samples,
                                       n_features=kwargs.get('n_features', GaussianArgs.n_features),
                                       mean=kwargs.get('mean', GaussianArgs.mean),
                                       cov=kwargs.get('cov', GaussianArgs.cov),
                                       n_classes=kwargs.get('n_classes', GaussianArgs.n_classes),
                                       shuffle=kwargs.get('shuffle', GaussianArgs.shuffle),
                                       random_state=kwargs.get('random_state', GaussianArgs.random_state))
    elif style == 'hastie':
        return make_hastie_10_2(n_samples,
                                random_state=kwargs.get('random_state', HastieArgs.random_state))
    elif style == 'circles':
        return make_circles(n_samples,
                            kwargs.get('shuffle', CirclesArgs.shuffle),
                            kwargs.get('noise', CirclesArgs.noise),
                            kwargs.get('random_state', CirclesArgs.random_state),
                            kwargs.get('factor', CirclesArgs.factor))
    elif style == 'moons':
        return make_moons(n_samples,
                          kwargs.get('shuffle', MoonsArgs.shuffle),
                          kwargs.get('noise', MoonsArgs.noise),
                          kwargs.get('random_state', MoonsArgs.random_state))
    elif style == 'biclusters':
        x = make_biclusters(kwargs.get('shape', BiclusterArgs.shape),
                            kwargs.get('n_clusters', BiclusterArgs.n_clusters),
                            kwargs.get('noise', BiclusterArgs.noise),
                            kwargs.get('minval', BiclusterArgs.minval),
                            kwargs.get('maxval', BiclusterArgs.maxval),
                            kwargs.get('shuffle', BiclusterArgs.shuffle),
                            kwargs.get('random_state', BiclusterArgs.random_state))
        if as_dataframe:
            return pd.concat([pd.DataFrame(x[0]), pd.DataFrame(x[1].T)], axis=1)
        else:
            return x

    elif style == 'scurve':
        return make_s_curve(n_samples,
                            kwargs.get('noise', SCurveArgs.noise),
                            kwargs.get('random_state', SCurveArgs.random_state))
    elif style == 'checker':
        return make_checkerboard(kwargs.get('shape', CheckerArgs.shape),
                                 kwargs.get('n_clusters', CheckerArgs.n_clusters),
                                 kwargs.get('noise', CheckerArgs.noise),
                                 kwargs.get('minval', CheckerArgs.minval),
                                 kwargs.get('maxval', CheckerArgs.maxval),
                                 kwargs.get('shuffle', CheckerArgs.shuffle),
                                 kwargs.get('random_state', CheckerArgs.random_state))
    elif style == 'friedman':
        return make_friedman1(n_samples,
                              kwargs.get('n_features', FriedmanArgs.n_features),
                              kwargs.get('noise', FriedmanArgs.noise),
                              kwargs.get('random_state', FriedmanArgs.random_state))
    elif style == 'friedman2':
        return make_friedman2(n_samples,
                              kwargs.get('noise', Friedman2Args.noise),
                              kwargs.get('random_state', Friedman2Args.random_state))
    elif style == 'friedman3':
        return make_friedman3(n_samples,
                              kwargs.get('noise', Friedman3Args.noise),
                              kwargs.get('random_state', Friedman3Args.random_state))
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

# data_prepare
# iris_data = datasets.load_iris()
#
# y_data = iris_data['target'][iris_data['target'] != 2]
# x_data = iris_data['data'][iris_data['target'] != 2, 1:3]
# y_data = np.where(y_data == 0, 1, -1)

bicluster_data = datasets.make_biclusters(shape=(100, 2),
                                          n_clusters=2,
                                          noise=20)

x_data = bicluster_data[0]
y_data = bicluster_data[1][0]
y_data = np.where(y_data, 1, -1)

x_train, x_test, y_train, y_test = \
    train_test_split(
        x_data,
        y_data,
        test_size=0.33,
        random_state=42,
        shuffle=True)

y_train = y_train.astype(np.float32).reshape(-1, 1)
y_test = y_test.astype(np.float32).reshape(-1, 1)
def getSKData(style='timeseries', n_samples=1, **kwargs):
    if isinstance(style, str):
        style = Style(style.lower())
    if style == Style.REGRESSION:
        return make_regression(
            n_samples, kwargs.get('n_features', RegressionArgs.n_features),
            kwargs.get('n_informative', RegressionArgs.n_informative),
            kwargs.get('n_targets', RegressionArgs.n_targets),
            kwargs.get('bias', RegressionArgs.bias),
            kwargs.get('effective_rank', RegressionArgs.effective_rank),
            kwargs.get('tail_strength', RegressionArgs.tail_strength),
            kwargs.get('noise', RegressionArgs.noise),
            kwargs.get('shuffle', RegressionArgs.shuffle),
            kwargs.get('coef', RegressionArgs.coef),
            kwargs.get('random_state', RegressionArgs.random_state))
    elif style == Style.BLOBS:
        return make_blobs(n_samples,
                          kwargs.get('n_features', BlobsArgs.n_features),
                          kwargs.get('centers', BlobsArgs.centers),
                          kwargs.get('cluster_std', BlobsArgs.cluster_std),
                          kwargs.get('center_box', BlobsArgs.center_box),
                          kwargs.get('shuffle', BlobsArgs.shuffle),
                          kwargs.get('random_state', BlobsArgs.random_state))
    elif style == Style.CLASSIFICATION:
        return make_classification(
            n_samples, kwargs.get('n_features', ClassificationArgs.n_features),
            kwargs.get('n_informative', ClassificationArgs.n_informative),
            kwargs.get('n_redundant', ClassificationArgs.n_redundant),
            kwargs.get('n_repeated', ClassificationArgs.n_repeated),
            kwargs.get('n_classes', ClassificationArgs.n_classes),
            kwargs.get('n_clusters_per_class',
                       ClassificationArgs.n_clusters_per_class),
            kwargs.get('weights', ClassificationArgs.weights),
            kwargs.get('flip_y', ClassificationArgs.flip_y),
            kwargs.get('class_sep', ClassificationArgs.class_sep),
            kwargs.get('hypercube', ClassificationArgs.hypercube),
            kwargs.get('shift', ClassificationArgs.shift),
            kwargs.get('scale', ClassificationArgs.scale),
            kwargs.get('shuffle', ClassificationArgs.shuffle),
            kwargs.get('random_state', ClassificationArgs.random_state))
    elif style == Style.MULTILABEL:
        return make_multilabel_classification(
            n_samples,
            kwargs.get('n_features', MultilabelClassificationArgs.n_features),
            kwargs.get('n_classes', MultilabelClassificationArgs.n_classes),
            kwargs.get('n_labels', MultilabelClassificationArgs.n_labels),
            kwargs.get('length', MultilabelClassificationArgs.length),
            kwargs.get('allow_unlabeled',
                       MultilabelClassificationArgs.allow_unlabeled),
            kwargs.get('sparse', MultilabelClassificationArgs.sparse),
            kwargs.get('return_indicator',
                       MultilabelClassificationArgs.return_indicator),
            kwargs.get('return_distributions',
                       MultilabelClassificationArgs.return_distributions),
            kwargs.get('random_state',
                       MultilabelClassificationArgs.random_state))
    elif style == Style.GAUSSIAN:
        return make_gaussian_quantiles(
            n_samples=n_samples,
            n_features=kwargs.get('n_features', GaussianArgs.n_features),
            mean=kwargs.get('mean', GaussianArgs.mean),
            cov=kwargs.get('cov', GaussianArgs.cov),
            n_classes=kwargs.get('n_classes', GaussianArgs.n_classes),
            shuffle=kwargs.get('shuffle', GaussianArgs.shuffle),
            random_state=kwargs.get('random_state', GaussianArgs.random_state))
    elif style == Style.HASTIE:
        return make_hastie_10_2(n_samples,
                                random_state=kwargs.get(
                                    'random_state', HastieArgs.random_state))
    elif style == Style.CIRCLES:
        return make_circles(
            n_samples, kwargs.get('shuffle', CirclesArgs.shuffle),
            kwargs.get('noise', CirclesArgs.noise),
            kwargs.get('random_state', CirclesArgs.random_state),
            kwargs.get('factor', CirclesArgs.factor))
    elif style == Style.MOONS:
        return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle),
                          kwargs.get('noise', MoonsArgs.noise),
                          kwargs.get('random_state', MoonsArgs.random_state))
    elif style == Style.BICLUSTERS:
        return make_biclusters(
            kwargs.get('shape', BiclusterArgs.shape),
            kwargs.get('n_clusters', BiclusterArgs.n_clusters),
            kwargs.get('noise', BiclusterArgs.noise),
            kwargs.get('minval', BiclusterArgs.minval),
            kwargs.get('maxval', BiclusterArgs.maxval),
            kwargs.get('shuffle', BiclusterArgs.shuffle),
            kwargs.get('random_state', BiclusterArgs.random_state))
    elif style == Style.SCURVE:
        return make_s_curve(
            n_samples, kwargs.get('noise', SCurveArgs.noise),
            kwargs.get('random_state', SCurveArgs.random_state))
    elif style == Style.CHECKER:
        return make_checkerboard(
            kwargs.get('shape', CheckerArgs.shape),
            kwargs.get('n_clusters', CheckerArgs.n_clusters),
            kwargs.get('noise', CheckerArgs.noise),
            kwargs.get('minval', CheckerArgs.minval),
            kwargs.get('maxval', CheckerArgs.maxval),
            kwargs.get('shuffle', CheckerArgs.shuffle),
            kwargs.get('random_state', CheckerArgs.random_state))
    elif style == Style.FRIEDMAN:
        return make_friedman1(
            n_samples, kwargs.get('n_features', FriedmanArgs.n_features),
            kwargs.get('noise', FriedmanArgs.noise),
            kwargs.get('random_state', FriedmanArgs.random_state))
    elif style == Style.FRIEDMAN2:
        return make_friedman2(
            n_samples, kwargs.get('noise', Friedman2Args.noise),
            kwargs.get('random_state', Friedman2Args.random_state))
    elif style == Style.FRIEDMAN3:
        return make_friedman3(
            n_samples, kwargs.get('noise', Friedman3Args.noise),
            kwargs.get('random_state', Friedman3Args.random_state))
Exemple #15
0
from sklearn import datasets
import matplotlib.pyplot as plt

# make_bicluster data
X,rows,cols = datasets.make_biclusters((10,10),4,noise=0.0,minval=10,maxval=100,shuffle=False,random_state=None)
print(X)
print(rows)
print(cols)

# plot dataset
plt.matshow(X)
plt.show()
Exemple #16
0
from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

import custom_spectral_biclustering as bs

NB_CLUSTERS = 5
SIZE = 300
NOIZE = 5
K_MEANS_ITERATIONS = 10

#build data
data_init, rows, columns = make_biclusters(shape=(SIZE, SIZE),
                                           n_clusters=NB_CLUSTERS,
                                           noise=NOIZE,
                                           shuffle=False,
                                           random_state=0)

# we dont want negative data
data_init = np.absolute(data_init)

#shuffle rows and columns!
data, row_idx, col_idx = sg._shuffle(data_init, random_state=0)

######### sklearn algorithm #########
model = SpectralCoclustering(n_clusters=NB_CLUSTERS, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))
fit_data = data[np.argsort(model.row_labels_)]
Exemple #17
0

def plot_posterior_predictive(ax, X, Z, title, colors, cmap="RdBu_r"):
    ax.contourf(*Xspace, Z, cmap=cmap, alpha=0.5, levels=20)
    ax.scatter(*X.T, c=colors)
    ax.set_title(title)
    ax.axis("off")
    plt.tight_layout()


# ** Generating training data **
key = random.PRNGKey(314)
n_datapoints, m = 20, 2
X, rows, cols = make_biclusters((n_datapoints, m),
                                2,
                                noise=0.6,
                                random_state=314,
                                minval=-3,
                                maxval=3)
y = rows[0] * 1.0

alpha = 1.0
init_noise = 1.0
Phi = jnp.c_[jnp.ones(n_datapoints)[:, None], X]
N, M = Phi.shape

# ** MCMC Sampling with BlackJAX **
sigma_mcmc = 0.8
w0 = random.multivariate_normal(key, jnp.zeros(M), jnp.eye(M) * init_noise)
E = partial(E_base, Phi=Phi, y=y, alpha=alpha)
initial_state = mh.new_state(w0, E)
Exemple #18
0
def test_co_clustering():

    import numpy as np
    import nibabel as nb
    from matplotlib import pyplot as plt
    import sklearn as sk
    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    # REAL DATA
    subject_file = '/Users/aki.nikolaidis/Desktop/NKI_SampleData/A00060280/3mm_bandpassed_demeaned_filtered_antswarp.nii.gz'
    roi_mask_file = home + '/git_repo/basc/masks/BG_3mm.nii.gz'
    roi2_mask_file = home + '/git_repo/basc/masks/yeo2_3mm.nii.gz'

    data = nb.load(subject_file).get_data().astype('float32')
    print('Data Loaded')

    print('Setting up NIS')
    roi_mask_file_nb = nb.load(roi_mask_file)
    roi2_mask_file_nb = nb.load(roi2_mask_file)

    roi_mask_nparray = nb.load(roi_mask_file).get_data().astype(
        'float32').astype('bool')
    roi2_mask_nparray = nb.load(roi2_mask_file).get_data().astype(
        'float32').astype('bool')

    roi1data = data[roi_mask_nparray]
    roi2data = data[roi2_mask_nparray]

    #add code that uploads the roi1data and roi2data, divides by the mean and standard deviation of the timeseries
    roi1data = sk.preprocessing.normalize(roi1data, norm='l2')
    roi2data = sk.preprocessing.normalize(roi2data, norm='l2')

    dist_btwn_data_1_2 = np.array(
        sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation'))
    sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2
    sim_btwn_data_1_2[np.isnan(sim_btwn_data_1_2)] = 0
    sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0

    sim_btwn_data_1_2 = sim_btwn_data_1_2 + (np.random.rand(
        len(sim_btwn_data_1_2), len(sim_btwn_data_1_2[1, :]))) / 100
    sim_btwn_data_1_2[sim_btwn_data_1_2 > 1] = 1

    sum(sum(sim_btwn_data_1_2 == np.inf))
    sum(sum(sim_btwn_data_1_2 == np.nan))

    model = SpectralCoclustering(n_clusters=5, random_state=0, n_init=100)
    model.fit(sim_btwn_data_1_2)

    fit_data = sim_btwn_data_1_2[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()

    #SIMULATION DATA
    import numpy as np
    from matplotlib import pyplot as plt

    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    #Creating Simulated Data
    data, rows, columns = make_biclusters(shape=(300, 100),
                                          n_clusters=5,
                                          noise=5,
                                          shuffle=False,
                                          random_state=0)

    plt.matshow(data, cmap=plt.cm.Blues)
    plt.title("Original dataset")

    data, row_idx, col_idx = sg._shuffle(data, random_state=0)
    plt.matshow(data, cmap=plt.cm.Blues)
    plt.title("Shuffled dataset")

    #Creating Model
    model = SpectralCoclustering(n_clusters=5, random_state=0)
    model.fit(data)
    score = consensus_score(model.biclusters_,
                            (rows[:, row_idx], columns[:, col_idx]))

    print("consensus score: {:.3f}".format(score))

    fit_data = data[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()

    ####################################################################
    ####################################################################
    from sklearn import cluster
    import scipy as sp
    import time
    from sklearn import cluster, datasets
    import numpy as np
    from matplotlib import pyplot as plt

    from sklearn.datasets import make_biclusters
    from sklearn.datasets import samples_generator as sg
    from sklearn.cluster.bicluster import SpectralCoclustering
    from sklearn.metrics import consensus_score

    data1 = generate_simple_blobs(27)
    data2 = generate_simple_blobs(27)
    data2 = data2[0:150, :]

    print("Calculating Cross-clustering")
    print("Calculating pairwise distances between areas")

    dist_btwn_data_1_2 = np.array(
        sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation'))
    sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2
    sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0
    co_cluster = cluster.SpectralCoclustering()
    co_cluster.fit(sim_btwn_data_1_2)
    score = consensus_score(co_cluster.biclusters_,
                            (rows[:, row_idx], columns[:, col_idx]))

    print("consensus score: {:.3f}".format(score))

    fit_data = data[np.argsort(co_cluster.row_labels_)]
    fit_data = fit_data[:, np.argsort(co_cluster.column_labels_)]

    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.title("After biclustering; rearranged to show biclusters")

    plt.show()
Exemple #19
0
print(__doc__)

# Author: Kemal Eren <*****@*****.**>
# License: BSD 3 clause

import numpy as np
from matplotlib import pyplot as plt

from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(shape=(300, 300),
                                      n_clusters=5,
                                      noise=5,
                                      shuffle=False,
                                      random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))
"""Test for susi.SOMClustering.

Usage:
python -m pytest tests/test_SOMClustering.py

"""
import pytest
import os
import sys
import numpy as np
from sklearn.datasets import make_biclusters
sys.path.insert(
    0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import susi

X, _, _ = make_biclusters((100, 10), 3)


@pytest.mark.parametrize("n_rows,n_columns", [
    (10, 10),
    (12, 15),
])
def test_som_clustering_init(n_rows, n_columns):
    som_clustering = susi.SOMClustering(
        n_rows=n_rows, n_columns=n_columns)
    assert som_clustering.n_rows == n_rows
    assert som_clustering.n_columns == n_columns


@pytest.mark.parametrize(
    "learning_rate_start,learning_rate_end,max_it,curr_it,mode,expected", [