def cluster_ward(roi, reference, i): # from numpy.random import seed # seed(i) # X, Y = resample(roi.T, reference.T) # print "Computing roi ref distances..." # distances = pairwise_distances(X.T, Y.T, metric='correlation') # scaled_distances = scale(distances, axis=1) try: distances = load( '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_b%d.pkl' % i) scaled_distances = load( '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_scaled_b%d.pkl' % i) # dump(distances, '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_b%d.pkl' % i) # dump(scaled_distances, '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_scaled_b%d.pkl' % i) Z = ward(distances.T) Z_scaled = ward(scaled_distances.T) dump( Z, '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_b%d.pkl' % i) dump( Z_scaled, '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_scaled_b%d.pkl' % i) except IOError: pass
def cluster_ward(distances, scaled_distances): Z = ward(distances) Z_scaled = ward(scaled_distances) dump( Z, '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_full.pkl' ) dump( Z_scaled, '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_scaled_full.pkl' )
def clustering(images, metric="euclidean", t=1.15): X = images X = X.reshape(X.shape[0], X.shape[1] * X.shape[2]) X_pca = PCA(n_components=100).fit_transform(X) X_dist = ward(pdist(X_pca, metric=metric)) clusters = fcluster(X_dist, t=t) return clusters
def hierarchicalClustering(g,k, labels, max_affinity=None): ''' Performs hierarchical clustering using the connections in graph g. Edge weights are assumed to be affinity, thus higher weights means the nodes are more similar. Computes a distance matrix from the graph affinities, and clusters using the 'fastcluster' library implementation of ward's linkage. @param g: The graph as from generateConnectivityGraph @param k: Number of clusters @param labels: The ground truth labels used for measuring cluster accuracy @param max_affinity: The maximum similarity score that is possible on the graph. If None, then the max edge weight of the graph is used. @return: A tuple (clusts, score) where clusts is the ordered list of cluster indexes and score is the v-measure between clusts and labels. ''' M = generateAffinityMatrix(g, max_affinity=max_affinity) if max_affinity is None: max_affinity = M.max() D = max_affinity - M Z = fc.ward(D) #linkage structure Z clusts = spc.fcluster(Z, k, criterion="maxclust") try: score = sklearn.metrics.v_measure_score(labels, clusts) except: print "Warning: sklearn module not loaded. V_measure_score not computed." score = -1 clusts = clusts - 1 #convert from 1-based to 0-based indexes return (clusts,score)
def cluster_ward(dataset, distances, roi, regions): print "Clustering: " Z = ward(distances) results = [] for n_reg in regions: labels = fcluster(Z, n_reg, 'maxclust') ### Try shortening this header = dataset.masker.get_header() header['cal_max'] = labels.max() header['cal_min'] = labels.min() voxel_labels = roi.masker.unmask(labels) img = nifti1.Nifti1Image(voxel_labels, None, header) results.append(img) return results
def hierarchicalClusteringDendrogram(g, max_affinity=None,show_dendrogram=False): ''' Generates the Ward's linkage structure on the connections in graph g. This function works the same as hierarchicalClustering(), but instead of returning the cluster membership for a given K, it returns the linkage structure and optionally shows the dendrogram. @param g: The graph as from generateConnectivityGraph @param max_affinity: The maximum similarity score that is possible on the graph. If None, then the max edge weight of the graph is used. @return: Z, the linkage structure ''' M = generateAffinityMatrix(g, max_affinity=max_affinity) if max_affinity is None: max_affinity = M.max() D = max_affinity - M Z = fc.ward(D) #linkage structure Z if show_dendrogram: import pylab fig = pylab.figure() spc.dendrogram(Z) fig.show() return Z
def compute_clustering_fast(distance): t1 = time.clock() c = fastcluster.ward(distance) t2 = time.clock() return scipy.cluster.hierarchy.fcluster(c, 2, criterion="maxclust")
from neurosynth.base.dataset import Dataset import joblib from sklearn.metrics import pairwise_distances from sklearn.preprocessing import scale from neurosynth.analysis.cluster import Clusterable dataset = Dataset.load('/projects/delavega/dbs/db_v6_topics-100.pkl') from fastcluster import ward roi = Clusterable( dataset, '/home/delavega/projects/classification/masks/l_70_mask.nii.gz') saved_pca = '/projects/delavega/clustering/dv_v6_reference_pca.pkl' reference = joblib.load(saved_pca) distances = pairwise_distances(roi.data, reference.data, metric='correlation') distances = scale(distances, axis=1) joblib.dump( distances, '/home/delavega/projects/clustering/results/hierarchical/v6_distances_l_70_scaled.pkl' ) Z = ward(distances) joblib.dump( Z, '/home/delavega/projects/clustering/results/hierarchical/v6_ward_l70_scaled.pkl' )
import pandas as pd df = pd.read_csv("~/downloads/to_cluster.csv") import fastcluster as fc out = fc.ward(df)