def test_make_biclusters(): X, rows, cols = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0) assert_equal(X.shape, (100, 100), "X shape mismatch") assert_equal(rows.shape, (4, 100), "rows shape mismatch") assert_equal(cols.shape, (4, 100), "columns shape mismatch") assert_all_finite(X) assert_all_finite(rows) assert_all_finite(cols) X2, _, _ = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0) assert_array_equal(X, X2)
def test_make_biclusters(): X, rows, cols = make_biclusters( shape=(100, 100), n_clusters=4, shuffle=True, random_state=0) assert_equal(X.shape, (100, 100), "X shape mismatch") assert_equal(rows.shape, (4, 100), "rows shape mismatch") assert_equal(cols.shape, (4, 100,), "columns shape mismatch") assert_all_finite(X) assert_all_finite(rows) assert_all_finite(cols) X2, _, _ = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0) assert_array_almost_equal(X, X2)
def test_spectral_coclustering(): """Test Dhillon's Spectral CoClustering on a simple problem.""" param_grid = {'svd_method': ['randomized', 'arpack'], 'n_svd_vecs': [None, 20], 'mini_batch': [False, True], 'init': ['k-means++'], 'n_init': [10], 'n_jobs': [1]} random_state = 0 S, rows, cols = make_biclusters((30, 30), 3, noise=0.5, random_state=random_state) S -= S.min() # needs to be nonnegative before making it sparse S = np.where(S < 1, 0, S) # threshold some values for mat in (S, csr_matrix(S)): for kwargs in ParameterGrid(param_grid): model = SpectralCoclustering(n_clusters=3, random_state=random_state, **kwargs) model.fit(mat) assert_equal(model.rows_.shape, (3, 30)) assert_array_equal(model.rows_.sum(axis=0), np.ones(30)) assert_array_equal(model.columns_.sum(axis=0), np.ones(30)) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
def test_n_features_in_(est): X, _, _ = make_biclusters((3, 3), 3, random_state=0) assert not hasattr(est, "n_features_in_") est.fit(X) assert est.n_features_in_ == 3
def test_spectral_coclustering(): # Test Dhillon's Spectral CoClustering on a simple problem. param_grid = { 'svd_method': ['randomized', 'arpack'], 'n_svd_vecs': [None, 20], 'mini_batch': [False, True], 'init': ['k-means++'], 'n_init': [10], 'n_jobs': [1] } random_state = 0 S, rows, cols = make_biclusters((30, 30), 3, noise=0.5, random_state=random_state) S -= S.min() # needs to be nonnegative before making it sparse S = np.where(S < 1, 0, S) # threshold some values for mat in (S, csr_matrix(S)): for kwargs in ParameterGrid(param_grid): model = SpectralCoclustering(n_clusters=3, random_state=random_state, **kwargs) model.fit(mat) assert_equal(model.rows_.shape, (3, 30)) assert_array_equal(model.rows_.sum(axis=0), np.ones(30)) assert_array_equal(model.columns_.sum(axis=0), np.ones(30)) assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1) _test_shape_indices(model)
def test_n_jobs_deprecated(klass, n_jobs): # FIXME: remove in 0.25 depr_msg = ("'n_jobs' was deprecated in version 0.23 and will be removed " "in 0.25.") S, _, _ = make_biclusters((30, 30), 3, noise=0.5, random_state=0) est = klass(random_state=0, n_jobs=n_jobs) with pytest.warns(FutureWarning, match=depr_msg): est.fit(S)
def sample_generators_datasets(self) : """ Sample generators """ logging.debug('----------------- Sample generators (Single label) -----------') blobs = datasets.make_blobs() print('blobs for ' , blobs) print('classification' , datasets.make_classification()) print('gaussian quantiles' , datasets.make_gaussian_quantiles()) print('----------------- Sample generators ( Multilabel) -----------') print('multilabel_classification' , datasets.make_multilabel_classification()) print('make_biclusters' , datasets.make_biclusters(shape=(300, 300) , n_clusters=5)) print('make_checkerboard' , datasets.make_multilabel_classification())
def plot_posterior_predictive(ax, X, Z, title, colors, cmap="RdBu_r"): ax.contourf(*Xspace, Z, cmap=cmap, alpha=0.7, levels=20) ax.scatter(*X.T, c=colors, edgecolors="gray", s=80) ax.set_title(title) ax.axis("off") plt.tight_layout() # ** Generating training data ** key = random.PRNGKey(314) n_datapoints, ndims = 50, 2 X, rows, cols = make_biclusters((n_datapoints, ndims), 2, noise=0.6, random_state=3141, minval=-4, maxval=4) y = rows[0] * 1.0 alpha = 1.0 init_noise = 1.0 Phi = jnp.c_[jnp.ones(n_datapoints)[:, None], X] # Design matrix ndata, ndims = Phi.shape # ** MCMC Sampling with BlackJAX ** sigma_mcmc = 0.8 w0 = random.multivariate_normal(key, jnp.zeros(ndims), jnp.eye(ndims) * init_noise) energy = partial(E_base, Phi=Phi, y=y, alpha=alpha) initial_state = mh.new_state(w0, energy)
""" print(__doc__) # Author: Kemal Eren <*****@*****.**> # License: BSD 3 clause import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score data, rows, columns = make_biclusters( shape=(300, 300), n_clusters=5, noise=5, shuffle=False, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Original dataset") data, row_idx, col_idx = sg._shuffle(data, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Shuffled dataset") model = SpectralCoclustering(n_clusters=5, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print "consensus score: {:.3f}".format(score)
def logjoint_fn(params, data, predict_fn): return loglikelihood_fn(params, *data, predict_fn) + logprior_fn(params) key = jax.random.PRNGKey(0) ## Data generating process n_datapoints = 50 m = 2 noise = 0.6 bound = 4 X, rows, _ = make_biclusters((n_datapoints, m), 2, noise=noise, random_state=3141, minval=-bound, maxval=bound) # whether datapoints belong to class 1 y = rows[0] * 1.0 Phi = jnp.c_[jnp.ones(n_datapoints)[:, None], X] nfeatures = Phi.shape[-1] # Model model = LogReg() init_key, key = jax.random.split(key) variables = model.init(init_key, Phi) # colors = ["black" if el else "white" for el in y]
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ ------------------------------------------------- File Name:biClusteringL Description : 双聚类,对行列同时进行聚类 Email : [email protected] Date:2018/1/1 """ from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.datasets import make_biclusters from sklearn.metrics import consensus_score data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=0.5, random_state=0) model = SpectralCoclustering(n_clusters=5, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows, columns)) print('scores: {}'.format(score))
def getSKData(style='timeseries', as_dataframe=False, n_samples=10, **kwargs): if style == 'regression': return make_regression(n_samples, kwargs.get('n_features', RegressionArgs.n_features), kwargs.get('n_informative', RegressionArgs.n_informative), kwargs.get('n_targets', RegressionArgs.n_targets), kwargs.get('bias', RegressionArgs.bias), kwargs.get('effective_rank', RegressionArgs.effective_rank), kwargs.get('tail_strength', RegressionArgs.tail_strength), kwargs.get('noise', RegressionArgs.noise), kwargs.get('shuffle', RegressionArgs.shuffle), kwargs.get('coef', RegressionArgs.coef), kwargs.get('random_state', RegressionArgs.random_state)) elif style == 'blobs': return make_blobs(n_samples, kwargs.get('n_features', BlobsArgs.n_features), kwargs.get('centers', BlobsArgs.centers), kwargs.get('cluster_std', BlobsArgs.cluster_std), kwargs.get('center_box', BlobsArgs.center_box), kwargs.get('shuffle', BlobsArgs.shuffle), kwargs.get('random_state', BlobsArgs.random_state)) elif style == 'classification': return make_classification(n_samples, kwargs.get('n_features', ClassificationArgs.n_features), kwargs.get('n_informative', ClassificationArgs.n_informative), kwargs.get('n_redundant', ClassificationArgs.n_redundant), kwargs.get('n_repeated', ClassificationArgs.n_repeated), kwargs.get('n_classes', ClassificationArgs.n_classes), kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class), kwargs.get('weights', ClassificationArgs.weights), kwargs.get('flip_y', ClassificationArgs.flip_y), kwargs.get('class_sep', ClassificationArgs.class_sep), kwargs.get('hypercube', ClassificationArgs.hypercube), kwargs.get('shift', ClassificationArgs.shift), kwargs.get('scale', ClassificationArgs.scale), kwargs.get('shuffle', ClassificationArgs.shuffle), kwargs.get('random_state', ClassificationArgs.random_state)) elif style == 'multilabel': return make_multilabel_classification(n_samples, kwargs.get('n_features', MultilabelClassificationArgs.n_features), kwargs.get('n_classes', MultilabelClassificationArgs.n_classes), kwargs.get('n_labels', MultilabelClassificationArgs.n_labels), kwargs.get('length', MultilabelClassificationArgs.length), kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled), kwargs.get('sparse', MultilabelClassificationArgs.sparse), kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator), kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions), kwargs.get('random_state', MultilabelClassificationArgs.random_state)) elif style == 'gaussian': return make_gaussian_quantiles(n_samples=n_samples, n_features=kwargs.get('n_features', GaussianArgs.n_features), mean=kwargs.get('mean', GaussianArgs.mean), cov=kwargs.get('cov', GaussianArgs.cov), n_classes=kwargs.get('n_classes', GaussianArgs.n_classes), shuffle=kwargs.get('shuffle', GaussianArgs.shuffle), random_state=kwargs.get('random_state', GaussianArgs.random_state)) elif style == 'hastie': return make_hastie_10_2(n_samples, random_state=kwargs.get('random_state', HastieArgs.random_state)) elif style == 'circles': return make_circles(n_samples, kwargs.get('shuffle', CirclesArgs.shuffle), kwargs.get('noise', CirclesArgs.noise), kwargs.get('random_state', CirclesArgs.random_state), kwargs.get('factor', CirclesArgs.factor)) elif style == 'moons': return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle), kwargs.get('noise', MoonsArgs.noise), kwargs.get('random_state', MoonsArgs.random_state)) elif style == 'biclusters': x = make_biclusters(kwargs.get('shape', BiclusterArgs.shape), kwargs.get('n_clusters', BiclusterArgs.n_clusters), kwargs.get('noise', BiclusterArgs.noise), kwargs.get('minval', BiclusterArgs.minval), kwargs.get('maxval', BiclusterArgs.maxval), kwargs.get('shuffle', BiclusterArgs.shuffle), kwargs.get('random_state', BiclusterArgs.random_state)) if as_dataframe: return pd.concat([pd.DataFrame(x[0]), pd.DataFrame(x[1].T)], axis=1) else: return x elif style == 'scurve': return make_s_curve(n_samples, kwargs.get('noise', SCurveArgs.noise), kwargs.get('random_state', SCurveArgs.random_state)) elif style == 'checker': return make_checkerboard(kwargs.get('shape', CheckerArgs.shape), kwargs.get('n_clusters', CheckerArgs.n_clusters), kwargs.get('noise', CheckerArgs.noise), kwargs.get('minval', CheckerArgs.minval), kwargs.get('maxval', CheckerArgs.maxval), kwargs.get('shuffle', CheckerArgs.shuffle), kwargs.get('random_state', CheckerArgs.random_state)) elif style == 'friedman': return make_friedman1(n_samples, kwargs.get('n_features', FriedmanArgs.n_features), kwargs.get('noise', FriedmanArgs.noise), kwargs.get('random_state', FriedmanArgs.random_state)) elif style == 'friedman2': return make_friedman2(n_samples, kwargs.get('noise', Friedman2Args.noise), kwargs.get('random_state', Friedman2Args.random_state)) elif style == 'friedman3': return make_friedman3(n_samples, kwargs.get('noise', Friedman3Args.noise), kwargs.get('random_state', Friedman3Args.random_state))
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split # data_prepare # iris_data = datasets.load_iris() # # y_data = iris_data['target'][iris_data['target'] != 2] # x_data = iris_data['data'][iris_data['target'] != 2, 1:3] # y_data = np.where(y_data == 0, 1, -1) bicluster_data = datasets.make_biclusters(shape=(100, 2), n_clusters=2, noise=20) x_data = bicluster_data[0] y_data = bicluster_data[1][0] y_data = np.where(y_data, 1, -1) x_train, x_test, y_train, y_test = \ train_test_split( x_data, y_data, test_size=0.33, random_state=42, shuffle=True) y_train = y_train.astype(np.float32).reshape(-1, 1) y_test = y_test.astype(np.float32).reshape(-1, 1)
def getSKData(style='timeseries', n_samples=1, **kwargs): if isinstance(style, str): style = Style(style.lower()) if style == Style.REGRESSION: return make_regression( n_samples, kwargs.get('n_features', RegressionArgs.n_features), kwargs.get('n_informative', RegressionArgs.n_informative), kwargs.get('n_targets', RegressionArgs.n_targets), kwargs.get('bias', RegressionArgs.bias), kwargs.get('effective_rank', RegressionArgs.effective_rank), kwargs.get('tail_strength', RegressionArgs.tail_strength), kwargs.get('noise', RegressionArgs.noise), kwargs.get('shuffle', RegressionArgs.shuffle), kwargs.get('coef', RegressionArgs.coef), kwargs.get('random_state', RegressionArgs.random_state)) elif style == Style.BLOBS: return make_blobs(n_samples, kwargs.get('n_features', BlobsArgs.n_features), kwargs.get('centers', BlobsArgs.centers), kwargs.get('cluster_std', BlobsArgs.cluster_std), kwargs.get('center_box', BlobsArgs.center_box), kwargs.get('shuffle', BlobsArgs.shuffle), kwargs.get('random_state', BlobsArgs.random_state)) elif style == Style.CLASSIFICATION: return make_classification( n_samples, kwargs.get('n_features', ClassificationArgs.n_features), kwargs.get('n_informative', ClassificationArgs.n_informative), kwargs.get('n_redundant', ClassificationArgs.n_redundant), kwargs.get('n_repeated', ClassificationArgs.n_repeated), kwargs.get('n_classes', ClassificationArgs.n_classes), kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class), kwargs.get('weights', ClassificationArgs.weights), kwargs.get('flip_y', ClassificationArgs.flip_y), kwargs.get('class_sep', ClassificationArgs.class_sep), kwargs.get('hypercube', ClassificationArgs.hypercube), kwargs.get('shift', ClassificationArgs.shift), kwargs.get('scale', ClassificationArgs.scale), kwargs.get('shuffle', ClassificationArgs.shuffle), kwargs.get('random_state', ClassificationArgs.random_state)) elif style == Style.MULTILABEL: return make_multilabel_classification( n_samples, kwargs.get('n_features', MultilabelClassificationArgs.n_features), kwargs.get('n_classes', MultilabelClassificationArgs.n_classes), kwargs.get('n_labels', MultilabelClassificationArgs.n_labels), kwargs.get('length', MultilabelClassificationArgs.length), kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled), kwargs.get('sparse', MultilabelClassificationArgs.sparse), kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator), kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions), kwargs.get('random_state', MultilabelClassificationArgs.random_state)) elif style == Style.GAUSSIAN: return make_gaussian_quantiles( n_samples=n_samples, n_features=kwargs.get('n_features', GaussianArgs.n_features), mean=kwargs.get('mean', GaussianArgs.mean), cov=kwargs.get('cov', GaussianArgs.cov), n_classes=kwargs.get('n_classes', GaussianArgs.n_classes), shuffle=kwargs.get('shuffle', GaussianArgs.shuffle), random_state=kwargs.get('random_state', GaussianArgs.random_state)) elif style == Style.HASTIE: return make_hastie_10_2(n_samples, random_state=kwargs.get( 'random_state', HastieArgs.random_state)) elif style == Style.CIRCLES: return make_circles( n_samples, kwargs.get('shuffle', CirclesArgs.shuffle), kwargs.get('noise', CirclesArgs.noise), kwargs.get('random_state', CirclesArgs.random_state), kwargs.get('factor', CirclesArgs.factor)) elif style == Style.MOONS: return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle), kwargs.get('noise', MoonsArgs.noise), kwargs.get('random_state', MoonsArgs.random_state)) elif style == Style.BICLUSTERS: return make_biclusters( kwargs.get('shape', BiclusterArgs.shape), kwargs.get('n_clusters', BiclusterArgs.n_clusters), kwargs.get('noise', BiclusterArgs.noise), kwargs.get('minval', BiclusterArgs.minval), kwargs.get('maxval', BiclusterArgs.maxval), kwargs.get('shuffle', BiclusterArgs.shuffle), kwargs.get('random_state', BiclusterArgs.random_state)) elif style == Style.SCURVE: return make_s_curve( n_samples, kwargs.get('noise', SCurveArgs.noise), kwargs.get('random_state', SCurveArgs.random_state)) elif style == Style.CHECKER: return make_checkerboard( kwargs.get('shape', CheckerArgs.shape), kwargs.get('n_clusters', CheckerArgs.n_clusters), kwargs.get('noise', CheckerArgs.noise), kwargs.get('minval', CheckerArgs.minval), kwargs.get('maxval', CheckerArgs.maxval), kwargs.get('shuffle', CheckerArgs.shuffle), kwargs.get('random_state', CheckerArgs.random_state)) elif style == Style.FRIEDMAN: return make_friedman1( n_samples, kwargs.get('n_features', FriedmanArgs.n_features), kwargs.get('noise', FriedmanArgs.noise), kwargs.get('random_state', FriedmanArgs.random_state)) elif style == Style.FRIEDMAN2: return make_friedman2( n_samples, kwargs.get('noise', Friedman2Args.noise), kwargs.get('random_state', Friedman2Args.random_state)) elif style == Style.FRIEDMAN3: return make_friedman3( n_samples, kwargs.get('noise', Friedman3Args.noise), kwargs.get('random_state', Friedman3Args.random_state))
from sklearn import datasets import matplotlib.pyplot as plt # make_bicluster data X,rows,cols = datasets.make_biclusters((10,10),4,noise=0.0,minval=10,maxval=100,shuffle=False,random_state=None) print(X) print(rows) print(cols) # plot dataset plt.matshow(X) plt.show()
from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score import custom_spectral_biclustering as bs NB_CLUSTERS = 5 SIZE = 300 NOIZE = 5 K_MEANS_ITERATIONS = 10 #build data data_init, rows, columns = make_biclusters(shape=(SIZE, SIZE), n_clusters=NB_CLUSTERS, noise=NOIZE, shuffle=False, random_state=0) # we dont want negative data data_init = np.absolute(data_init) #shuffle rows and columns! data, row_idx, col_idx = sg._shuffle(data_init, random_state=0) ######### sklearn algorithm ######### model = SpectralCoclustering(n_clusters=NB_CLUSTERS, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx])) fit_data = data[np.argsort(model.row_labels_)]
def plot_posterior_predictive(ax, X, Z, title, colors, cmap="RdBu_r"): ax.contourf(*Xspace, Z, cmap=cmap, alpha=0.5, levels=20) ax.scatter(*X.T, c=colors) ax.set_title(title) ax.axis("off") plt.tight_layout() # ** Generating training data ** key = random.PRNGKey(314) n_datapoints, m = 20, 2 X, rows, cols = make_biclusters((n_datapoints, m), 2, noise=0.6, random_state=314, minval=-3, maxval=3) y = rows[0] * 1.0 alpha = 1.0 init_noise = 1.0 Phi = jnp.c_[jnp.ones(n_datapoints)[:, None], X] N, M = Phi.shape # ** MCMC Sampling with BlackJAX ** sigma_mcmc = 0.8 w0 = random.multivariate_normal(key, jnp.zeros(M), jnp.eye(M) * init_noise) E = partial(E_base, Phi=Phi, y=y, alpha=alpha) initial_state = mh.new_state(w0, E)
def test_co_clustering(): import numpy as np import nibabel as nb from matplotlib import pyplot as plt import sklearn as sk from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score # REAL DATA subject_file = '/Users/aki.nikolaidis/Desktop/NKI_SampleData/A00060280/3mm_bandpassed_demeaned_filtered_antswarp.nii.gz' roi_mask_file = home + '/git_repo/basc/masks/BG_3mm.nii.gz' roi2_mask_file = home + '/git_repo/basc/masks/yeo2_3mm.nii.gz' data = nb.load(subject_file).get_data().astype('float32') print('Data Loaded') print('Setting up NIS') roi_mask_file_nb = nb.load(roi_mask_file) roi2_mask_file_nb = nb.load(roi2_mask_file) roi_mask_nparray = nb.load(roi_mask_file).get_data().astype( 'float32').astype('bool') roi2_mask_nparray = nb.load(roi2_mask_file).get_data().astype( 'float32').astype('bool') roi1data = data[roi_mask_nparray] roi2data = data[roi2_mask_nparray] #add code that uploads the roi1data and roi2data, divides by the mean and standard deviation of the timeseries roi1data = sk.preprocessing.normalize(roi1data, norm='l2') roi2data = sk.preprocessing.normalize(roi2data, norm='l2') dist_btwn_data_1_2 = np.array( sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation')) sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2 sim_btwn_data_1_2[np.isnan(sim_btwn_data_1_2)] = 0 sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0 sim_btwn_data_1_2 = sim_btwn_data_1_2 + (np.random.rand( len(sim_btwn_data_1_2), len(sim_btwn_data_1_2[1, :]))) / 100 sim_btwn_data_1_2[sim_btwn_data_1_2 > 1] = 1 sum(sum(sim_btwn_data_1_2 == np.inf)) sum(sum(sim_btwn_data_1_2 == np.nan)) model = SpectralCoclustering(n_clusters=5, random_state=0, n_init=100) model.fit(sim_btwn_data_1_2) fit_data = sim_btwn_data_1_2[np.argsort(model.row_labels_)] fit_data = fit_data[:, np.argsort(model.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters") plt.show() #SIMULATION DATA import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score #Creating Simulated Data data, rows, columns = make_biclusters(shape=(300, 100), n_clusters=5, noise=5, shuffle=False, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Original dataset") data, row_idx, col_idx = sg._shuffle(data, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Shuffled dataset") #Creating Model model = SpectralCoclustering(n_clusters=5, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print("consensus score: {:.3f}".format(score)) fit_data = data[np.argsort(model.row_labels_)] fit_data = fit_data[:, np.argsort(model.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters") plt.show() #################################################################### #################################################################### from sklearn import cluster import scipy as sp import time from sklearn import cluster, datasets import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score data1 = generate_simple_blobs(27) data2 = generate_simple_blobs(27) data2 = data2[0:150, :] print("Calculating Cross-clustering") print("Calculating pairwise distances between areas") dist_btwn_data_1_2 = np.array( sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation')) sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2 sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0 co_cluster = cluster.SpectralCoclustering() co_cluster.fit(sim_btwn_data_1_2) score = consensus_score(co_cluster.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print("consensus score: {:.3f}".format(score)) fit_data = data[np.argsort(co_cluster.row_labels_)] fit_data = fit_data[:, np.argsort(co_cluster.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters") plt.show()
print(__doc__) # Author: Kemal Eren <*****@*****.**> # License: BSD 3 clause import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=5, shuffle=False, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Original dataset") data, row_idx, col_idx = sg._shuffle(data, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Shuffled dataset") model = SpectralCoclustering(n_clusters=6, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx]))
"""Test for susi.SOMClustering. Usage: python -m pytest tests/test_SOMClustering.py """ import pytest import os import sys import numpy as np from sklearn.datasets import make_biclusters sys.path.insert( 0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import susi X, _, _ = make_biclusters((100, 10), 3) @pytest.mark.parametrize("n_rows,n_columns", [ (10, 10), (12, 15), ]) def test_som_clustering_init(n_rows, n_columns): som_clustering = susi.SOMClustering( n_rows=n_rows, n_columns=n_columns) assert som_clustering.n_rows == n_rows assert som_clustering.n_columns == n_columns @pytest.mark.parametrize( "learning_rate_start,learning_rate_end,max_it,curr_it,mode,expected", [