Esempi in Python per SpectralClustering.fit_predict, esempi in Python per sklearn.cluster.spectral.SpectralClustering.fit_predict

Esempio n. 1

0

Mostra file

File: SCWrapper.py Progetto: WhatNick69/SklearnWrapper

    def execute(self, dataset):
        X = dataset[0]
        X = StandardScaler().fit_transform(X)

        clf = SpectralClustering(n_clusters=self.n_clusters,
                                 eigen_solver=self.eigen_solver,
                                 random_state=self.random_state,
                                 n_init=self.n_init,
                                 gamma=self.gamma,
                                 affinity=self.affinity,
                                 n_neighbors=self.n_neighbors,
                                 eigen_tol=self.eigen_tol,
                                 assign_labels=self.assign_labels,
                                 degree=self.degree,
                                 coef0=self.coef0,
                                 n_jobs=self.n_jobs)
        y = clf.fit_predict(X)

        labels = set(y)
        colors = ListedColormap([plt.get_cmap(name = "gist_ncar")(each)
            for each in np.linspace(0, 1, len(labels))])

        X0, X1 = X[:,0], X[:,1]
        plt.clf()
        plt.scatter(X[:,0], X[:,1], c=y, cmap=colors, s=20, edgecolors='k')
        plt.xlim(X0.min() - 0.5, X0.max() + 0.5)
        plt.ylim(X1.min() - 0.5, X1.max() + 0.5)
        plt.title('Spectral Clustering')
        plt.show()

Esempio n. 2

0

Mostra file

File: Features.py Progetto: bendboaz/CIFinalProject

def get_feature_clusters(df, label_column, idx2colname, n_clusters=13):
    if label_column in df.columns:
        df = df.drop([label_column], axis=1)
    clusterer = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=346345)
    cluster_argindices = clusterer.fit_predict(np.abs(df.corr()))
    cluster_indices = [np.where(cluster_argindices == cluster_idx)[0] for cluster_idx in range(0, n_clusters)]
    name_clusters = map(lambda x: list(map(idx2colname.__getitem__, x)), cluster_indices)
    return name_clusters, cluster_indices

Esempio n. 3

0

Mostra file

File: mutualinformation.py Progetto: yulanl22/htmd

    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception('The length of the protein doesn\'t match the Mutual Information data')
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append(
                        [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float,), {}))
        G = nx.from_pandas_edgelist(pd, 'source', 'target', 'weight')
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if
                        np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)}
        nx.set_node_attributes(G,  seg_res_dict, 'Segment')
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, flow_cent, 'flowcent')
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float,), {}))
        spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()}
        nx.set_node_attributes(G, spectral_dict, 'spectral')
        self.graph = G

Esempio n. 4

0

Mostra file

File: mutualinformation.py Progetto: alejandrovr/htmd

    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception('The length of the protein doesn\'t match the Mutual Information data')
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append(
                        [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float,), {}))
        G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight'])
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if
                        np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)}
        nx.set_node_attributes(G, 'Segment', seg_res_dict)
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, 'flowcent', flow_cent)
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float,), {}))
        spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()}
        nx.set_node_attributes(G, 'spectral', spectral_dict)
        self.graph = G

Esempio n. 5

0

Mostra file

File: scp_v01.py Progetto: BloodD/my-utils

def main():
    '''
    Spectral clustering...
    '''
    st =  time.time()
    tmpset = Dataset([])
   # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id)
    hfilename = 'fdt_matrix2.T.hdf5'
    print hfilename
    #load connectivity profile of seed mask voxels  
    conn = open_conn_mat(hfilename) 
    tmpset.a = conn.a
    print conn.shape,conn.a
    #remove some features
    mask = create_mask(conn.samples,0.5,1)
   # print mask,mask.shape
    conn_m = mask_feature(conn.samples,mask)
   # print  conn_m
    map = conn_m.T
    print "map:"
    print map.shape,map.max(),map.min()
    
    voxel = np.array(conn.fa.values())
    print voxel[0]
    v = voxel[0]
    spacedist = ds.cdist(v,v,'euclidean') 
    print spacedist

    """
    similar_mat = create_similarity_mat(map,conn.fa,0.1,2)
    X = np.array(similar_mat)
    print "similarity matrix: shape:",X.shape
    print X
    """
    
    corr = np.corrcoef(map)
    corr = np.abs(corr)
    corr = 0.1*corr + 0.9/(spacedist+1)
    
    print "Elaspsed time: ", time.time() - st
    print corr.shape,corr
    plt.imshow(corr,interpolation='nearest',cmap=cm.jet)
    cb = plt.colorbar() 
    pl.xticks(())
    pl.yticks(())
    pl.show()
    
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True)
    #sc.fit(map)
    sc.fit_predict(corr)
    '''
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True)
    sc.fit(map)
   # sc.fit_predict(X)
   # param = sc.get_params(deep=True)
    '''
    tmpset.samples = sc.labels_+1
   # print sc.affinity_matrix_
    #print list(sc.labels_)
    print "Elaspsed time: ", time.time() - st
    print "Number of voxels: ", sc.labels_.size
    print "Number  of clusters: ", np.unique(sc.labels_).size

    result = map2nifti(tmpset)
    result.to_filename("fg_parcel_S0006.nii.gz")
    print ".....The end........"

Esempio n. 6

0

Mostra file

File: spectral_seg.py Progetto: BloodD/LabBNU

def spectral_seg(hfilename,outf):
    '''
    Spectral clustering...
    '''
    tmpset = Dataset([])
    #pdb.set_trace()
    print "hdf name:",hfilename
    st =  time.time()
    ###1.load connectivity profile of seed mask voxels
    conn = h5load(hfilename)
    tmpset.a = conn.a
    print "connection matrix shape:"
    print conn.shape
    ###2.features select
    mask = create_mask(conn.samples,5)
    conn_m = conn.samples[mask]
    map = conn_m.T
    print "masked conn matrix:"
    print map.shape,map.max(),map.min()
    
    ###3.average the connection profile.
    temp = np.zeros(map.shape)
    voxel = np.array(conn.fa.values())
    v = voxel[0]
    v = v.tolist()
    
    shape = [256,256,256]
    
    i = 0
    for coor in v:
        mean_f = map[i]
        #print mean_f.shape
        #plt.plot(mean_f)
        #plt.show()
        
        neigh =get_neighbors(coor,2,shape)
        #print "neigh:",neigh

        count = 1
        for n in neigh:
            if n in v:
               mean_f = (mean_f*count + map[v.index(n)])/(count+1)
               count+=1

        temp[i] = mean_f
        i+=1
    #sys.exit(0)
    map = temp
    print "average connection matrix"
    
    ###4.spacial distance
    spacedist = ds.cdist(v,v,'euclidean') 
    #print spacedist
    
    ###5.correlation matrix
    corr = np.corrcoef(map)
    corr = np.abs(corr)
    
    ###6.mix similariry matrix.
    corr = 0.7*corr + 0.3/(spacedist+1)
    #plt.imshow(corr,interpolation='nearest',cmap=cm.jet)
    #cb = plt.colorbar() 
    #pl.xticks(())
    #pl.yticks(())
    #pl.show()
    print "mix up the corr and spacial matrix"
    
    #sys.exit(0)
    ###7.spectral segmentation    
    print "do segmentation"
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True)
    sc.fit_predict(corr)
    
    tmpset.samples = sc.labels_+1
    print "Number of voxels: ", sc.labels_.size
    print "Number  of clusters: ", np.unique(sc.labels_).size
    print "Elapsed time: ", time.time() - st
    
    ###8.save the segmentation result.
    print "save the result to xxx_parcel.nii.gz"
    result = map2nifti(tmpset)
    result.to_filename(outf)
    print ".....Segment end........"
    
    return True

Esempio n. 7

0

Mostra file

File: scp_v01.py Progetto: BloodD/my-utils

def main():
    '''
    Spectral clustering...
    '''
    st = time.time()
    tmpset = Dataset([])
    # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id)
    hfilename = 'fdt_matrix2.T.hdf5'
    print hfilename
    #load connectivity profile of seed mask voxels
    conn = open_conn_mat(hfilename)
    tmpset.a = conn.a
    print conn.shape, conn.a
    #remove some features
    mask = create_mask(conn.samples, 0.5, 1)
    # print mask,mask.shape
    conn_m = mask_feature(conn.samples, mask)
    # print  conn_m
    map = conn_m.T
    print "map:"
    print map.shape, map.max(), map.min()

    voxel = np.array(conn.fa.values())
    print voxel[0]
    v = voxel[0]
    spacedist = ds.cdist(v, v, 'euclidean')
    print spacedist
    """
    similar_mat = create_similarity_mat(map,conn.fa,0.1,2)
    X = np.array(similar_mat)
    print "similarity matrix: shape:",X.shape
    print X
    """

    corr = np.corrcoef(map)
    corr = np.abs(corr)
    corr = 0.1 * corr + 0.9 / (spacedist + 1)

    print "Elaspsed time: ", time.time() - st
    print corr.shape, corr
    plt.imshow(corr, interpolation='nearest', cmap=cm.jet)
    cb = plt.colorbar()
    pl.xticks(())
    pl.yticks(())
    pl.show()

    cnum = 3
    near = 100
    sc = SpectralClustering(cnum, 'arpack', None, 100, 1, 'precomputed', near,
                            None, True)
    #sc.fit(map)
    sc.fit_predict(corr)
    '''
    cnum = 3
    near = 100
    sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True)
    sc.fit(map)
   # sc.fit_predict(X)
   # param = sc.get_params(deep=True)
    '''
    tmpset.samples = sc.labels_ + 1
    # print sc.affinity_matrix_
    #print list(sc.labels_)
    print "Elaspsed time: ", time.time() - st
    print "Number of voxels: ", sc.labels_.size
    print "Number  of clusters: ", np.unique(sc.labels_).size

    result = map2nifti(tmpset)
    result.to_filename("fg_parcel_S0006.nii.gz")
    print ".....The end........"