def parcellation_ward_spatial(func_data, n_clusters, graph=None): """ Make parcellation based upon ward hierarchical clustering from scikit-learn Inputs: - func_data: functional data: array of shape (nb_positions, dim_feature_1, [dim_feature2, ...]) - n_clusters: chosen number of clusters to create - graph: adjacency list defining neighbours (if None, no connectivity defined: clustering is spatially independent) Output: parcellation labels """ try: # sklearn version < 0.17 from sklearn.cluster import Ward as AgglomerativeClustering except ImportError: from sklearn.cluster import AgglomerativeClustering from pyhrf.graph import graph_to_sparse_matrix, sub_graph if graph is not None: labels = np.zeros(len(graph), dtype=np.int32) ccs = connected_components(graph) n_tot = len(graph) * 1. ncs = np.zeros(len(ccs), dtype=np.int32) for icc, cc in enumerate(ccs): nc = int(np.round(n_clusters * len(cc) / n_tot)) if nc == 0: nc = 1 ncs[icc] = nc max_nc = np.argmax(ncs) ncs[max_nc] = n_clusters - sum(ncs[0:max_nc]) - sum(ncs[max_nc + 1:]) assert sum(ncs) == n_clusters assert (ncs > 0).all() logger.info('Found %d connected components (CC) of sizes: %s', len(ccs), ' ,'.join([str(len(cc)) for cc in ccs])) logger.info('Nb of clusters to search in each CC: %s', ' ,'.join(map(str, ncs))) for nc, cc in zip(ncs, ccs): if len(cc) < 2: continue if len(cc) < len(graph): cc_graph, _ = sub_graph(graph, cc) else: cc_graph = graph cc_connectivity = graph_to_sparse_matrix(cc_graph) cc_data = func_data[cc] logger.info('Launch spatial Ward (nclusters=%d) on data of shape %s', nc, str(cc_data.shape)) ward_object = AgglomerativeClustering( n_clusters=nc, connectivity=cc_connectivity ).fit(cc_data) labels[cc] += ward_object.labels_ + 1 + labels.max() else: ward_object = AgglomerativeClustering( n_clusters=n_clusters ).fit(func_data) # connectivity=None labels = ward_object.labels_ + 1 return labels
def parcellation_ward_spatial(func_data, n_clusters, graph=None): """ Make parcellation based upon ward hierarchical clustering from scikit-learn Inputs: - func_data: functional data: array of shape (nb_positions, dim_feature_1, [dim_feature2, ...]) - n_clusters: chosen number of clusters to create - graph: adjacency list defining neighbours (if None, no connectivity defined: clustering is spatially independent) Output: parcellation labels """ from sklearn.cluster import Ward from pyhrf.graph import graph_to_sparse_matrix, sub_graph #from pyhrf.tools import cartesian # print 'graph:' # print graph if graph is not None: #print 'connectivity;' #print connectivity.todense() labels = np.zeros(len(graph), dtype=np.int32) ccs = connected_components(graph) n_tot = len(graph) * 1. ncs = np.zeros(len(ccs), dtype=np.int32) for icc,cc in enumerate(ccs): nc = int(np.round(n_clusters * len(cc)/n_tot)) if nc == 0: nc = 1 ncs[icc] = nc max_nc = np.argmax(ncs) ncs[max_nc] = n_clusters - sum(ncs[0:max_nc]) - sum(ncs[max_nc+1:]) assert sum(ncs) == n_clusters assert (ncs > 0).all() pyhrf.verbose(1, 'Found %d connected components (CC) of sizes: %s' \ %(len(ccs), ' ,'.join([str(len(cc)) for cc in ccs]))) pyhrf.verbose(1, 'Nb of clusters to search in each CC: %s' \ %' ,'.join(map(str,ncs))) for nc,cc in zip(ncs,ccs): #print 'cc:', len(cc) #print 'cartesian:', list(cartesian(cc,cc)) if len(cc) < 2: continue if len(cc) < len(graph): cc_graph,_ = sub_graph(graph, cc) else: cc_graph = graph cc_connectivity = graph_to_sparse_matrix(cc_graph) #print 'compute subslice ...' # sub_slice = tuple(np.array(list(cartesian(cc,cc))).T) #indexes of the subpart of the connectivity matrix #print 'sub_slice:' #print sub_slice #print 'subslice connectivity matrix ...' #cc_connectivity = connectivity.tolil()[sub_slice].reshape((len(cc),len(cc))).tocoo() #indexes applied to the matrix connectivity (coo unsubscriptable) #print 'cc_connectivity' #print cc_connectivity.todense() cc_data = func_data[cc] pyhrf.verbose(2, 'Launch spatial Ward (nclusters=%d) '\ ' on data of shape %s' %(nc, str(cc_data.shape))) ward_object = Ward(n_clusters=nc, connectivity=cc_connectivity).fit(cc_data) labels[cc] += ward_object.labels_ + 1 + labels.max() else: ward_object = Ward(n_clusters=n_clusters).fit(func_data) # connectivity=None labels = ward_object.labels_ + 1 return labels
def parcellation_ward_spatial(func_data, n_clusters, graph=None): """Make parcellation based upon ward hierarchical clustering from scikit-learn Parameters ---------- func_data: array of shape (nb_positions, dim_feature_1, [dim_feature2, ...]) functional data: n_clusters chosen number of clusters to create graph adjacency list defining neighbours. if None, no connectivity defined: clustering is spatially independent Returns ------- parcellation labels """ try: # sklearn version < 0.17 from sklearn.cluster import Ward as AgglomerativeClustering except ImportError: from sklearn.cluster import AgglomerativeClustering from pyhrf.graph import graph_to_sparse_matrix, sub_graph if graph is not None: labels = np.zeros(len(graph), dtype=np.int32) ccs = connected_components(graph) n_tot = len(graph) * 1. ncs = np.zeros(len(ccs), dtype=np.int32) for icc, cc in enumerate(ccs): nc = int(np.round(n_clusters * len(cc) / n_tot)) if nc == 0: nc = 1 ncs[icc] = nc max_nc = np.argmax(ncs) ncs[max_nc] = n_clusters - sum(ncs[0:max_nc]) - sum(ncs[max_nc + 1:]) assert sum(ncs) == n_clusters assert (ncs > 0).all() logger.info('Found %d connected components (CC) of sizes: %s', len(ccs), ' ,'.join([str(len(cc)) for cc in ccs])) logger.info('Nb of clusters to search in each CC: %s', ' ,'.join(map(str, ncs))) for nc, cc in zip(ncs, ccs): if len(cc) < 2: continue if len(cc) < len(graph): cc_graph, _ = sub_graph(graph, cc) else: cc_graph = graph cc_connectivity = graph_to_sparse_matrix(cc_graph) cc_data = func_data[cc] logger.info( 'Launch spatial Ward (nclusters=%d) on data of shape %s', nc, str(cc_data.shape)) ward_object = AgglomerativeClustering( n_clusters=nc, connectivity=cc_connectivity).fit(cc_data) labels[cc] += ward_object.labels_ + 1 + labels.max() else: ward_object = AgglomerativeClustering(n_clusters=n_clusters).fit( func_data) # connectivity=None labels = ward_object.labels_ + 1 return labels