def execute(self, dataset): X = dataset[0] X = StandardScaler().fit_transform(X) clf = SpectralClustering(n_clusters=self.n_clusters, eigen_solver=self.eigen_solver, random_state=self.random_state, n_init=self.n_init, gamma=self.gamma, affinity=self.affinity, n_neighbors=self.n_neighbors, eigen_tol=self.eigen_tol, assign_labels=self.assign_labels, degree=self.degree, coef0=self.coef0, n_jobs=self.n_jobs) y = clf.fit_predict(X) labels = set(y) colors = ListedColormap([plt.get_cmap(name = "gist_ncar")(each) for each in np.linspace(0, 1, len(labels))]) X0, X1 = X[:,0], X[:,1] plt.clf() plt.scatter(X[:,0], X[:,1], c=y, cmap=colors, s=20, edgecolors='k') plt.xlim(X0.min() - 0.5, X0.max() + 0.5) plt.ylim(X1.min() - 0.5, X1.max() + 0.5) plt.title('Spectral Clustering') plt.show()
def get_feature_clusters(df, label_column, idx2colname, n_clusters=13): if label_column in df.columns: df = df.drop([label_column], axis=1) clusterer = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=346345) cluster_argindices = clusterer.fit_predict(np.abs(df.corr())) cluster_indices = [np.where(cluster_argindices == cluster_idx)[0] for cluster_idx in range(0, n_clusters)] name_clusters = map(lambda x: list(map(idx2colname.__getitem__, x)), cluster_indices) return name_clusters, cluster_indices
def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception('The length of the protein doesn\'t match the Mutual Information data') contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append( [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {})) pd['weight'] = pd['weight'].astype(type('float', (float,), {})) G = nx.from_pandas_edgelist(pd, 'source', 'target', 'weight') ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)} nx.set_node_attributes(G, seg_res_dict, 'Segment') ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, flow_cent, 'flowcent') Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float,), {})) spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()} nx.set_node_attributes(G, spectral_dict, 'spectral') self.graph = G
def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception('The length of the protein doesn\'t match the Mutual Information data') contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append( [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {})) pd['weight'] = pd['weight'].astype(type('float', (float,), {})) G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight']) ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)} nx.set_node_attributes(G, 'Segment', seg_res_dict) ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, 'flowcent', flow_cent) Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float,), {})) spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()} nx.set_node_attributes(G, 'spectral', spectral_dict) self.graph = G
def main(): ''' Spectral clustering... ''' st = time.time() tmpset = Dataset([]) # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id) hfilename = 'fdt_matrix2.T.hdf5' print hfilename #load connectivity profile of seed mask voxels conn = open_conn_mat(hfilename) tmpset.a = conn.a print conn.shape,conn.a #remove some features mask = create_mask(conn.samples,0.5,1) # print mask,mask.shape conn_m = mask_feature(conn.samples,mask) # print conn_m map = conn_m.T print "map:" print map.shape,map.max(),map.min() voxel = np.array(conn.fa.values()) print voxel[0] v = voxel[0] spacedist = ds.cdist(v,v,'euclidean') print spacedist """ similar_mat = create_similarity_mat(map,conn.fa,0.1,2) X = np.array(similar_mat) print "similarity matrix: shape:",X.shape print X """ corr = np.corrcoef(map) corr = np.abs(corr) corr = 0.1*corr + 0.9/(spacedist+1) print "Elaspsed time: ", time.time() - st print corr.shape,corr plt.imshow(corr,interpolation='nearest',cmap=cm.jet) cb = plt.colorbar() pl.xticks(()) pl.yticks(()) pl.show() cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True) #sc.fit(map) sc.fit_predict(corr) ''' cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True) sc.fit(map) # sc.fit_predict(X) # param = sc.get_params(deep=True) ''' tmpset.samples = sc.labels_+1 # print sc.affinity_matrix_ #print list(sc.labels_) print "Elaspsed time: ", time.time() - st print "Number of voxels: ", sc.labels_.size print "Number of clusters: ", np.unique(sc.labels_).size result = map2nifti(tmpset) result.to_filename("fg_parcel_S0006.nii.gz") print ".....The end........"
def spectral_seg(hfilename,outf): ''' Spectral clustering... ''' tmpset = Dataset([]) #pdb.set_trace() print "hdf name:",hfilename st = time.time() ###1.load connectivity profile of seed mask voxels conn = h5load(hfilename) tmpset.a = conn.a print "connection matrix shape:" print conn.shape ###2.features select mask = create_mask(conn.samples,5) conn_m = conn.samples[mask] map = conn_m.T print "masked conn matrix:" print map.shape,map.max(),map.min() ###3.average the connection profile. temp = np.zeros(map.shape) voxel = np.array(conn.fa.values()) v = voxel[0] v = v.tolist() shape = [256,256,256] i = 0 for coor in v: mean_f = map[i] #print mean_f.shape #plt.plot(mean_f) #plt.show() neigh =get_neighbors(coor,2,shape) #print "neigh:",neigh count = 1 for n in neigh: if n in v: mean_f = (mean_f*count + map[v.index(n)])/(count+1) count+=1 temp[i] = mean_f i+=1 #sys.exit(0) map = temp print "average connection matrix" ###4.spacial distance spacedist = ds.cdist(v,v,'euclidean') #print spacedist ###5.correlation matrix corr = np.corrcoef(map) corr = np.abs(corr) ###6.mix similariry matrix. corr = 0.7*corr + 0.3/(spacedist+1) #plt.imshow(corr,interpolation='nearest',cmap=cm.jet) #cb = plt.colorbar() #pl.xticks(()) #pl.yticks(()) #pl.show() print "mix up the corr and spacial matrix" #sys.exit(0) ###7.spectral segmentation print "do segmentation" cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True) sc.fit_predict(corr) tmpset.samples = sc.labels_+1 print "Number of voxels: ", sc.labels_.size print "Number of clusters: ", np.unique(sc.labels_).size print "Elapsed time: ", time.time() - st ###8.save the segmentation result. print "save the result to xxx_parcel.nii.gz" result = map2nifti(tmpset) result.to_filename(outf) print ".....Segment end........" return True
def main(): ''' Spectral clustering... ''' st = time.time() tmpset = Dataset([]) # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id) hfilename = 'fdt_matrix2.T.hdf5' print hfilename #load connectivity profile of seed mask voxels conn = open_conn_mat(hfilename) tmpset.a = conn.a print conn.shape, conn.a #remove some features mask = create_mask(conn.samples, 0.5, 1) # print mask,mask.shape conn_m = mask_feature(conn.samples, mask) # print conn_m map = conn_m.T print "map:" print map.shape, map.max(), map.min() voxel = np.array(conn.fa.values()) print voxel[0] v = voxel[0] spacedist = ds.cdist(v, v, 'euclidean') print spacedist """ similar_mat = create_similarity_mat(map,conn.fa,0.1,2) X = np.array(similar_mat) print "similarity matrix: shape:",X.shape print X """ corr = np.corrcoef(map) corr = np.abs(corr) corr = 0.1 * corr + 0.9 / (spacedist + 1) print "Elaspsed time: ", time.time() - st print corr.shape, corr plt.imshow(corr, interpolation='nearest', cmap=cm.jet) cb = plt.colorbar() pl.xticks(()) pl.yticks(()) pl.show() cnum = 3 near = 100 sc = SpectralClustering(cnum, 'arpack', None, 100, 1, 'precomputed', near, None, True) #sc.fit(map) sc.fit_predict(corr) ''' cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True) sc.fit(map) # sc.fit_predict(X) # param = sc.get_params(deep=True) ''' tmpset.samples = sc.labels_ + 1 # print sc.affinity_matrix_ #print list(sc.labels_) print "Elaspsed time: ", time.time() - st print "Number of voxels: ", sc.labels_.size print "Number of clusters: ", np.unique(sc.labels_).size result = map2nifti(tmpset) result.to_filename("fg_parcel_S0006.nii.gz") print ".....The end........"