def compare_models(self, n_clusters, score_func): """Compare the model performance with respect to a score metric. Args: random_state (int): n_clusters (int): Returns: (tuple): The name and score of the selected model, in addition to a model instance with the optimal hyperparameter settings. """ # Shuffle data matrix, row and col indicators for random state. _train, self.row_idx, self.col_idx = sgen._shuffle( self._data, random_state=self.random_state) # NB: Subtract mean and divide by std. _train_std = self.scaler.fit_transform(_train) best_score = -np.float('inf') winning_model, best_params = None, None for model, params in self.models_and_params: if isinstance(n_clusters, (tuple, list)): if not isinstance(model, SpectralBiclustering): n_clusters = min(n_clusters) # Determine the best hyperparameter combo for that model _grid = GridSearchCV(model(random_state=self.random_state, n_clusters=n_clusters), param_grid=params, scoring=score_func, n_jobs=16, cv=self.dummy_cv, return_train_score=True, refit=False) _grid.fit(_train_std, y=None) if self.verbose > 1: print('Model performance:\nName: {}\nScore: {}\n' ''.format(model.__name__, _grid.best_score_)) if _grid.best_score_ > best_score: winner_model = model.__name__ best_score = _grid.best_score_ winner_params = _grid.best_params_ return (winner_model, winner_params, best_score)
from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=5, shuffle=False, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Original dataset") data, row_idx, col_idx = sg._shuffle(data, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Shuffled dataset") model = SpectralCoclustering(n_clusters=6, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print("consensus score: {:.3f}".format(score)) fit_data = data[np.argsort(model.row_labels_)] fit_data = fit_data[:, np.argsort(model.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters")
os.remove(temppath) size(W, HEIGHT+dy+40) else: def pltshow(mplpyplot): mplpyplot.show() # nodebox section end n_clusters = (4, 3) data, rows, columns = make_checkerboard( shape=(300, 300), n_clusters=n_clusters, noise=10, shuffle=False, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Original dataset") data, row_idx, col_idx = sg._shuffle(data, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Shuffled dataset") model = SpectralBiclustering(n_clusters=n_clusters, method='log', random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print("consensus score: {:.1f}".format(score)) fit_data = data[np.argsort(model.row_labels_)] fit_data = fit_data[:, np.argsort(model.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues)
def test_co_clustering(): import numpy as np import nibabel as nb from matplotlib import pyplot as plt import sklearn as sk from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score # REAL DATA subject_file = '/Users/aki.nikolaidis/Desktop/NKI_SampleData/A00060280/3mm_bandpassed_demeaned_filtered_antswarp.nii.gz' roi_mask_file = home + '/git_repo/basc/masks/BG_3mm.nii.gz' roi2_mask_file = home + '/git_repo/basc/masks/yeo2_3mm.nii.gz' data = nb.load(subject_file).get_data().astype('float32') print('Data Loaded') print('Setting up NIS') roi_mask_file_nb = nb.load(roi_mask_file) roi2_mask_file_nb = nb.load(roi2_mask_file) roi_mask_nparray = nb.load(roi_mask_file).get_data().astype( 'float32').astype('bool') roi2_mask_nparray = nb.load(roi2_mask_file).get_data().astype( 'float32').astype('bool') roi1data = data[roi_mask_nparray] roi2data = data[roi2_mask_nparray] #add code that uploads the roi1data and roi2data, divides by the mean and standard deviation of the timeseries roi1data = sk.preprocessing.normalize(roi1data, norm='l2') roi2data = sk.preprocessing.normalize(roi2data, norm='l2') dist_btwn_data_1_2 = np.array( sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation')) sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2 sim_btwn_data_1_2[np.isnan(sim_btwn_data_1_2)] = 0 sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0 sim_btwn_data_1_2 = sim_btwn_data_1_2 + (np.random.rand( len(sim_btwn_data_1_2), len(sim_btwn_data_1_2[1, :]))) / 100 sim_btwn_data_1_2[sim_btwn_data_1_2 > 1] = 1 sum(sum(sim_btwn_data_1_2 == np.inf)) sum(sum(sim_btwn_data_1_2 == np.nan)) model = SpectralCoclustering(n_clusters=5, random_state=0, n_init=100) model.fit(sim_btwn_data_1_2) fit_data = sim_btwn_data_1_2[np.argsort(model.row_labels_)] fit_data = fit_data[:, np.argsort(model.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters") plt.show() #SIMULATION DATA import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score #Creating Simulated Data data, rows, columns = make_biclusters(shape=(300, 100), n_clusters=5, noise=5, shuffle=False, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Original dataset") data, row_idx, col_idx = sg._shuffle(data, random_state=0) plt.matshow(data, cmap=plt.cm.Blues) plt.title("Shuffled dataset") #Creating Model model = SpectralCoclustering(n_clusters=5, random_state=0) model.fit(data) score = consensus_score(model.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print("consensus score: {:.3f}".format(score)) fit_data = data[np.argsort(model.row_labels_)] fit_data = fit_data[:, np.argsort(model.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters") plt.show() #################################################################### #################################################################### from sklearn import cluster import scipy as sp import time from sklearn import cluster, datasets import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_biclusters from sklearn.datasets import samples_generator as sg from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.metrics import consensus_score data1 = generate_simple_blobs(27) data2 = generate_simple_blobs(27) data2 = data2[0:150, :] print("Calculating Cross-clustering") print("Calculating pairwise distances between areas") dist_btwn_data_1_2 = np.array( sp.spatial.distance.cdist(roi1data, roi2data, metric='correlation')) sim_btwn_data_1_2 = 1 - dist_btwn_data_1_2 sim_btwn_data_1_2[sim_btwn_data_1_2 < 0] = 0 co_cluster = cluster.SpectralCoclustering() co_cluster.fit(sim_btwn_data_1_2) score = consensus_score(co_cluster.biclusters_, (rows[:, row_idx], columns[:, col_idx])) print("consensus score: {:.3f}".format(score)) fit_data = data[np.argsort(co_cluster.row_labels_)] fit_data = fit_data[:, np.argsort(co_cluster.column_labels_)] plt.matshow(fit_data, cmap=plt.cm.Blues) plt.title("After biclustering; rearranged to show biclusters") plt.show()