Example #1
0
    def execute(self, dataset):
        X = dataset[0]
        X = StandardScaler().fit_transform(X)

        clf = SpectralClustering(n_clusters=self.n_clusters,
                                 eigen_solver=self.eigen_solver,
                                 random_state=self.random_state,
                                 n_init=self.n_init,
                                 gamma=self.gamma,
                                 affinity=self.affinity,
                                 n_neighbors=self.n_neighbors,
                                 eigen_tol=self.eigen_tol,
                                 assign_labels=self.assign_labels,
                                 degree=self.degree,
                                 coef0=self.coef0,
                                 n_jobs=self.n_jobs)
        y = clf.fit_predict(X)

        labels = set(y)
        colors = ListedColormap([plt.get_cmap(name = "gist_ncar")(each)
            for each in np.linspace(0, 1, len(labels))])

        X0, X1 = X[:,0], X[:,1]
        plt.clf()
        plt.scatter(X[:,0], X[:,1], c=y, cmap=colors, s=20, edgecolors='k')
        plt.xlim(X0.min() - 0.5, X0.max() + 0.5)
        plt.ylim(X1.min() - 0.5, X1.max() + 0.5)
        plt.title('Spectral Clustering')
        plt.show()
Example #2
0
 def fit_predict_close(self, X, raw_input_=False):
     """
     using close-form solution
     :param X:
     :param raw_input_:
     :return:
     """
     n_sample = X.shape[0]
     if raw_input_ is True:
         H = X
     else:
         H = NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X)
     C = np.zeros((n_sample, n_sample))
     for i in range(n_sample):
         y_i = H[i]
         H_i = np.delete(H, i, axis=0).transpose()
         term_1 = np.linalg.inv(
             np.dot(H_i.transpose(), H_i) +
             self.lambda_coef * np.eye(n_sample - 1))
         w = np.dot(np.dot(term_1, H_i.transpose()),
                    y_i.reshape((y_i.shape[0], 1)))
         w = w.flatten()
         #  Normalize the columns of C: ci = ci / ||ci||_ss.
         coef = w / np.max(np.abs(w))
         C[:i, i] = coef[:i]
         C[i + 1:, i] = coef[i:]
     # compute affinity matrix
     L = 0.5 * (np.abs(C) + np.abs(C.T))  # affinity graph
     self.affinity_matrix = L
     # spectral clustering
     sc = SpectralClustering(n_clusters=self.n_clusters,
                             affinity='precomputed')
     sc.fit(self.affinity_matrix)
     return sc.labels_
Example #3
0
 def fit_predict_cvx(self, X):
     n_sample = X.shape[0]
     H = X  #NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X)
     C = np.zeros((n_sample, n_sample))
     # solve sparse self-expressive representation
     for i in range(n_sample):
         y_i = H[i]
         H_i = np.delete(H, i, axis=0)
         # H_T = H_i.transpose()  # M x (N-1)
         # omp = OrthogonalMatchingPursuit(n_nonzero_coefs=500)
         # omp.fit(H_i.transpose(), y_i)
         w = cvx.Variable(n_sample - 1)
         objective = cvx.Minimize(
             0.5 * cvx.sum_squares(H_i.transpose() * w - y_i) +
             0.5 * self.lambda_coef * cvx.norm(w, 1))
         prob = cvx.Problem(objective)
         result = prob.solve()
         #  Normalize the columns of C: ci = ci / ||ci||_ss.
         ww = np.asarray(w.value).flatten()
         coef = ww / np.max(np.abs(ww))
         C[:i, i] = coef[:i]
         C[i + 1:, i] = coef[i:]
     # compute affinity matrix
     L = 0.5 * (np.abs(C) + np.abs(C.T))  # affinity graph
     # L = 0.5 * (C + C.T)
     self.affinity_matrix = L
     # spectral clustering
     sc = SpectralClustering(n_clusters=self.n_clusters,
                             affinity='precomputed')
     sc.fit(self.affinity_matrix)
     return sc.labels_
Example #4
0
 def fit_predict_omp(self, X, y=None):
     n_sample = X.shape[0]
     H = NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X)
     C = np.zeros((n_sample, n_sample))
     # solve sparse self-expressive representation
     for i in range(n_sample):
         y_i = H[i]
         H_i = np.delete(H, i, axis=0)
         # H_T = H_i.transpose()  # M x (N-1)
         omp = OrthogonalMatchingPursuit(n_nonzero_coefs=int(n_sample *
                                                             0.5),
                                         tol=1e20)
         omp.fit(H_i.transpose(), y_i)
         #  Normalize the columns of C: ci = ci / ||ci||_ss.
         coef = omp.coef_ / np.max(np.abs(omp.coef_))
         C[:i, i] = coef[:i]
         C[i + 1:, i] = coef[i:]
     # compute affinity matrix
     L = 0.5 * (np.abs(C) + np.abs(C.T))  # affinity graph
     # L = 0.5 * (C + C.T)
     self.affinity_matrix = L
     # spectral clustering
     sc = SpectralClustering(n_clusters=self.n_clusters,
                             affinity='precomputed')
     sc.fit(self.affinity_matrix)
     return sc.labels_
Example #5
0
def get_feature_clusters(df, label_column, idx2colname, n_clusters=13):
    if label_column in df.columns:
        df = df.drop([label_column], axis=1)
    clusterer = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=346345)
    cluster_argindices = clusterer.fit_predict(np.abs(df.corr()))
    cluster_indices = [np.where(cluster_argindices == cluster_idx)[0] for cluster_idx in range(0, n_clusters)]
    name_clusters = map(lambda x: list(map(idx2colname.__getitem__, x)), cluster_indices)
    return name_clusters, cluster_indices
def computeIntersectionSC_pheno(medians, medGENES, medSI, delta_l, k_l, phenotypic_labels):
    result=np.empty(shape=(len(delta_l), len(k_l)), dtype=float)
    
    for j,delta in enumerate(delta_l):
        affinity=np.exp(-delta*medians**2)
        
        for i,k in enumerate(k_l):
            print '----', delta, k  
            model=SpectralClustering(affinity='precomputed', n_clusters=k)
            model.fit(affinity)
            
            result[j,i]=intersection(model.labels_, phenotypic_labels, medSI)
            
    return result
Example #7
0
    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception('The length of the protein doesn\'t match the Mutual Information data')
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append(
                        [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float,), {}))
        G = nx.from_pandas_edgelist(pd, 'source', 'target', 'weight')
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if
                        np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)}
        nx.set_node_attributes(G,  seg_res_dict, 'Segment')
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, flow_cent, 'flowcent')
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float,), {}))
        spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()}
        nx.set_node_attributes(G, spectral_dict, 'spectral')
        self.graph = G
Example #8
0
    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception('The length of the protein doesn\'t match the Mutual Information data')
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append(
                        [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float,), {}))
        G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight'])
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if
                        np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)}
        nx.set_node_attributes(G, 'Segment', seg_res_dict)
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, 'flowcent', flow_cent)
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float,), {}))
        spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()}
        nx.set_node_attributes(G, 'spectral', spectral_dict)
        self.graph = G
Example #9
0
    def predict(self, X):
        """
        :param X: shape [n_row*n_clm, n_band]
        :return: selected band subset
        """
        I = np.eye(X.shape[1])
        coefficient_mat = -1 * np.dot(
            np.linalg.inv(np.dot(X.transpose(), X) + self.coef_ * I),
            np.linalg.inv(
                np.diag(np.diag(np.dot(X.transpose(), X) + self.coef_ * I))))
        temp = np.linalg.norm(coefficient_mat, axis=0).reshape(1, -1)
        affinity = (np.dot(coefficient_mat.transpose(), coefficient_mat) /
                    np.dot(temp.transpose(), temp))**2

        sc = SpectralClustering(n_clusters=self.n_band, affinity='precomputed')
        sc.fit(affinity)
        selected_band = self.__get_band(sc.labels_, X)
        return selected_band
    def run(self, graph: nx.Graph, k: int):
        pred_k = SpectralClustering(
            n_clusters=k,
            eigen_solver="amg",
            random_state=int(os.environ["random_state"]),
            n_components=self.offset + k,
            affinity="precomputed",
            n_jobs=-1).fit_predict(nx.adjacency_matrix(graph))
        print("Done, partitioning now...\n")

        partitioned = nx.Graph()
        for index, node in enumerate(graph.nodes):
            partitioned.add_node(node, partition=pred_k[index])
        partitioned.add_edges_from(graph.edges)

        return partitioned
Example #11
0
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
			'RobustScaler':RobustScaler(),
			'SGDClassifier':SGDClassifier(),
			'SGDRegressor':SGDRegressor(),
			'SVC':SVC(),
			'SVR':SVR(),
			'SelectFdr':SelectFdr(),
			'SelectFpr':SelectFpr(),
			'SelectFwe':SelectFwe(),
			'SelectKBest':SelectKBest(),
			'SelectPercentile':SelectPercentile(),
			'ShrunkCovariance':ShrunkCovariance(),
			'SkewedChi2Sampler':SkewedChi2Sampler(),
			'SparsePCA':SparsePCA(),
			'SparseRandomProjection':SparseRandomProjection(),
			'SpectralBiclustering':SpectralBiclustering(),
			'SpectralClustering':SpectralClustering(),
			'SpectralCoclustering':SpectralCoclustering(),
			'SpectralEmbedding':SpectralEmbedding(),
			'StandardScaler':StandardScaler(),
			'TSNE':TSNE(),
			'TheilSenRegressor':TheilSenRegressor(),
			'VBGMM':VBGMM(),
			'VarianceThreshold':VarianceThreshold(),}

    
Example #12
0
def main():
    '''
    Spectral clustering...
    '''
    st =  time.time()
    tmpset = Dataset([])
   # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id)
    hfilename = 'fdt_matrix2.T.hdf5'
    print hfilename
    #load connectivity profile of seed mask voxels  
    conn = open_conn_mat(hfilename) 
    tmpset.a = conn.a
    print conn.shape,conn.a
    #remove some features
    mask = create_mask(conn.samples,0.5,1)
   # print mask,mask.shape
    conn_m = mask_feature(conn.samples,mask)
   # print  conn_m
    map = conn_m.T
    print "map:"
    print map.shape,map.max(),map.min()
    
    voxel = np.array(conn.fa.values())
    print voxel[0]
    v = voxel[0]
    spacedist = ds.cdist(v,v,'euclidean') 
    print spacedist

    """
    similar_mat = create_similarity_mat(map,conn.fa,0.1,2)
    X = np.array(similar_mat)
    print "similarity matrix: shape:",X.shape
    print X
    """
    
    corr = np.corrcoef(map)
    corr = np.abs(corr)
    corr = 0.1*corr + 0.9/(spacedist+1)
    
    print "Elaspsed time: ", time.time() - st
    print corr.shape,corr
    plt.imshow(corr,interpolation='nearest',cmap=cm.jet)
    cb = plt.colorbar() 
    pl.xticks(())
    pl.yticks(())
    pl.show()
    
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True)
    #sc.fit(map)
    sc.fit_predict(corr)
    '''
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True)
    sc.fit(map)
   # sc.fit_predict(X)
   # param = sc.get_params(deep=True)
    '''
    tmpset.samples = sc.labels_+1
   # print sc.affinity_matrix_
    #print list(sc.labels_)
    print "Elaspsed time: ", time.time() - st
    print "Number of voxels: ", sc.labels_.size
    print "Number  of clusters: ", np.unique(sc.labels_).size

    result = map2nifti(tmpset)
    result.to_filename("fg_parcel_S0006.nii.gz")
    print ".....The end........"
Example #13
0
def spectral_seg(hfilename,outf):
    '''
    Spectral clustering...
    '''
    tmpset = Dataset([])
    #pdb.set_trace()
    print "hdf name:",hfilename
    st =  time.time()
    ###1.load connectivity profile of seed mask voxels
    conn = h5load(hfilename)
    tmpset.a = conn.a
    print "connection matrix shape:"
    print conn.shape
    ###2.features select
    mask = create_mask(conn.samples,5)
    conn_m = conn.samples[mask]
    map = conn_m.T
    print "masked conn matrix:"
    print map.shape,map.max(),map.min()
    
    ###3.average the connection profile.
    temp = np.zeros(map.shape)
    voxel = np.array(conn.fa.values())
    v = voxel[0]
    v = v.tolist()
    
    shape = [256,256,256]
    
    i = 0
    for coor in v:
        mean_f = map[i]
        #print mean_f.shape
        #plt.plot(mean_f)
        #plt.show()
        
        neigh =get_neighbors(coor,2,shape)
        #print "neigh:",neigh

        count = 1
        for n in neigh:
            if n in v:
               mean_f = (mean_f*count + map[v.index(n)])/(count+1)
               count+=1

        temp[i] = mean_f
        i+=1
    #sys.exit(0)
    map = temp
    print "average connection matrix"
    
    ###4.spacial distance
    spacedist = ds.cdist(v,v,'euclidean') 
    #print spacedist
    
    ###5.correlation matrix
    corr = np.corrcoef(map)
    corr = np.abs(corr)
    
    ###6.mix similariry matrix.
    corr = 0.7*corr + 0.3/(spacedist+1)
    #plt.imshow(corr,interpolation='nearest',cmap=cm.jet)
    #cb = plt.colorbar() 
    #pl.xticks(())
    #pl.yticks(())
    #pl.show()
    print "mix up the corr and spacial matrix"
    
    #sys.exit(0)
    ###7.spectral segmentation    
    print "do segmentation"
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True)
    sc.fit_predict(corr)
    
    tmpset.samples = sc.labels_+1
    print "Number of voxels: ", sc.labels_.size
    print "Number  of clusters: ", np.unique(sc.labels_).size
    print "Elapsed time: ", time.time() - st
    
    ###8.save the segmentation result.
    print "save the result to xxx_parcel.nii.gz"
    result = map2nifti(tmpset)
    result.to_filename(outf)
    print ".....Segment end........"
    
    return True
Example #14
0
def main():
    '''
    Spectral clustering...
    '''
    st = time.time()
    tmpset = Dataset([])
    # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id)
    hfilename = 'fdt_matrix2.T.hdf5'
    print hfilename
    #load connectivity profile of seed mask voxels
    conn = open_conn_mat(hfilename)
    tmpset.a = conn.a
    print conn.shape, conn.a
    #remove some features
    mask = create_mask(conn.samples, 0.5, 1)
    # print mask,mask.shape
    conn_m = mask_feature(conn.samples, mask)
    # print  conn_m
    map = conn_m.T
    print "map:"
    print map.shape, map.max(), map.min()

    voxel = np.array(conn.fa.values())
    print voxel[0]
    v = voxel[0]
    spacedist = ds.cdist(v, v, 'euclidean')
    print spacedist
    """
    similar_mat = create_similarity_mat(map,conn.fa,0.1,2)
    X = np.array(similar_mat)
    print "similarity matrix: shape:",X.shape
    print X
    """

    corr = np.corrcoef(map)
    corr = np.abs(corr)
    corr = 0.1 * corr + 0.9 / (spacedist + 1)

    print "Elaspsed time: ", time.time() - st
    print corr.shape, corr
    plt.imshow(corr, interpolation='nearest', cmap=cm.jet)
    cb = plt.colorbar()
    pl.xticks(())
    pl.yticks(())
    pl.show()

    cnum = 3
    near = 100
    sc = SpectralClustering(cnum, 'arpack', None, 100, 1, 'precomputed', near,
                            None, True)
    #sc.fit(map)
    sc.fit_predict(corr)
    '''
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True)
    sc.fit(map)
   # sc.fit_predict(X)
   # param = sc.get_params(deep=True)
    '''
    tmpset.samples = sc.labels_ + 1
    # print sc.affinity_matrix_
    #print list(sc.labels_)
    print "Elaspsed time: ", time.time() - st
    print "Number of voxels: ", sc.labels_.size
    print "Number  of clusters: ", np.unique(sc.labels_).size

    result = map2nifti(tmpset)
    result.to_filename("fg_parcel_S0006.nii.gz")
    print ".....The end........"