def test_move_nodes(self): G = ig.Graph.Full(100); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5); self.optimiser.move_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS); self.assertListEqual( partition.sizes(), [100], msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after move nodes incorrect.");
def test_optimiser(self): G = reduce(ig.Graph.disjoint_union, (ig.Graph.Tree(10, 3, mode=ig.TREE_UNDIRECTED) for i in range(10))); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0); self.optimiser.consider_comms=leidenalg.ALL_NEIGH_COMMS; self.optimiser.optimise_partition(partition); self.assertListEqual( partition.sizes(), 10*[10], msg="After optimising partition failed to find different components with CPMVertexPartition(resolution_parameter=0)");
def test_merge_nodes_with_max_comm_size(self): G = ig.Graph.Full(100); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5); self.optimiser.max_comm_size = 17 self.optimiser.merge_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS); self.assertListEqual( partition.sizes(), [17, 17, 17, 17, 17, 15], msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after merge nodes (max_comm_size=17) incorrect.");
def fit(self): '''Compute communities from a matrix with fixed nodes Returns: None, but the membership attribute is set as an array of int with size N - n_fixed with the community/cluster membership of all columns except the first n fixed ones. ''' self._parse_graph() aa = self.annotations n_fixed = len(aa) g = self.graph N = g.vcount() opt = leidenalg.Optimiser() fixed_nodes = [int(i < n_fixed) for i in range(N)] # NOTE: initial membership is singletons except for atlas nodes, which # get the membership they have. aau = list(np.unique(aa)) aaun = len(aau) initial_membership = [] for j in range(N): if j < n_fixed: mb = aau.index(aa[j]) else: mb = aaun + (j - n_fixed) initial_membership.append(mb) if self.metric == 'cpm': partition = leidenalg.CPMVertexPartition( g, resolution_parameter=self.resolution_parameter, initial_membership=initial_membership, ) elif self.metric == 'modularity': partition = leidenalg.ModularityVertexPartition( g, resolution_parameter=self.resolution_parameter, initial_membership=initial_membership, ) else: raise ValueError('clustering_metric not understood: {:}'.format( self.metric)) # Run modified Leiden here opt.optimise_partition(partition, fixed_nodes=fixed_nodes) # Exctract result membership = partition.membership[n_fixed:] # Convert the known cell types lstring = len(max(aau, key=len)) self.membership = np.array([str(x) for x in membership], dtype='U{:}'.format(lstring)) for i, ct in enumerate(aau): self.membership[self.membership == str(i)] = ct
def test_optimiser_with_max_comm_size(self): G = ig.Graph.Full(100); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0); self.optimiser.consider_comms=leidenalg.ALL_NEIGH_COMMS; self.optimiser.max_comm_size = 10 self.optimiser.optimise_partition(partition); self.assertListEqual( partition.sizes(), 10*[10], msg="After optimising partition (max_comm_size=10) failed to find different components with CPMVertexPartition(resolution_parameter=0)");
def test_neg_weight_bipartite(self): G = ig.Graph.Full_Bipartite(50, 50); G.es['weight'] = -0.1; partition = leidenalg.CPMVertexPartition(G, resolution_parameter=-0.1, weights='weight'); self.optimiser.consider_comms=leidenalg.ALL_COMMS; self.optimiser.optimise_partition(partition); self.assertListEqual( partition.sizes(), 2*[50], msg="After optimising partition failed to find bipartite structure with CPMVertexPartition(resolution_parameter=-0.1)");
def test_diff_move_node_optimality(self): G = ig.Graph.Erdos_Renyi(100, p=5./100, directed=False, loops=False); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.1); while 0 < self.optimiser.move_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS): pass; for v in G.vs: neigh_comms = set(partition.membership[u.index] for u in v.neighbors()); for c in neigh_comms: self.assertLessEqual( partition.diff_move(v.index, c), 1e-10, # Allow for a small difference up to rounding error. msg="Was able to move a node to a better community, violating node optimality.");
def test_merge_nodes(self): G = ig.Graph.Full(100); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5); self.optimiser.merge_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS); self.assertListEqual( partition.sizes(), [100], msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after merge nodes incorrect."); self.assertEqual( partition.total_weight_in_all_comms(), G.ecount(), msg="total_weight_in_all_comms not equal to ecount of graph.");
def test_move_nodes_with_fixed(self): # One edge plus singleton, but the two connected nodes are fixed G = ig.Graph([(0, 2)]) is_membership_fixed = [True, False, True] partition = leidenalg.CPMVertexPartition( G, resolution_parameter=0.1); self.optimiser.move_nodes(partition, is_membership_fixed=is_membership_fixed, consider_comms=leidenalg.ALL_NEIGH_COMMS); self.assertListEqual( partition.sizes(), [1, 1, 1], msg="CPMVertexPartition(resolution_parameter=0.1) of one edge plus singleton after move nodes with fixed nodes is incorrect.");
def test_optimiser_split_with_max_comm_size(self): G = ig.Graph.Full(100); partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5); self.optimiser.merge_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS); self.assertListEqual( partition.sizes(), [100], msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after merge nodes incorrect."); self.optimiser.max_comm_size = 10 self.optimiser.optimise_partition(partition); self.assertListEqual( partition.sizes(), 10*[10], msg="After optimising partition (max_comm_size=10) failed to find different components with CPMVertexPartition(resolution_parameter=0.5)");
def test_optimiser_with_fixed_nodes(self): G = ig.Graph.Full(3) partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.01, initial_membership=[2, 1, 0]) # Equivalent to setting initial membership #partition.set_membership([2, 1, 2]) opt = leidenalg.Optimiser() fixed_nodes = [True, False, False] opt.optimise_partition(partition, fixed_nodes=fixed_nodes) self.assertListEqual( partition.membership, [2, 2, 2], msg= "After optimising partition with fixed nodes failed to recover initial fixed memberships" )
def test_optimiser_with_is_membership_fixed(self): G = ig.Graph.Full(3) partition = leidenalg.CPMVertexPartition( G, resolution_parameter=0.01, initial_membership=[2, 1, 0]) # Equivalent to setting initial membership #partition.set_membership([2, 1, 2]) is_membership_fixed = [True, False, False] original_quality = partition.quality() diff = self.optimiser.optimise_partition(partition, is_membership_fixed=is_membership_fixed) self.assertAlmostEqual(partition.quality() - original_quality, diff, places=10, msg="Optimisation with fixed nodes returns inconsistent quality") self.assertListEqual( partition.membership, [2, 2, 2], msg="After optimising partition with fixed nodes failed to recover initial fixed memberships" )
fn_anno = '../../data/sequencing/{:}-mRNA/samplesheet_with_Leiden_community_and_coverage.tsv'.format( sn) if os.path.isfile(fn_anno): print('Load clusters from file') ds.samplesheet['community'] = pd.read_csv(fn_anno, sep='\t', index_col=0)['community'] else: print('Unsupervised clustering') import igraph as ig sys.path.insert(0, os.path.abspath('../../packages/')) import leidenalg G = ig.Graph(edges=edges) partition = partition = leidenalg.CPMVertexPartition( G, resolution_parameter=0.01, ) opt = leidenalg.Optimiser() opt.optimise_partition(partition) communities = partition.membership print('n. communities: {:}'.format(len(np.unique(communities)))) ds.samplesheet['community'] = communities print('Unsupervised clustering, rough') import igraph as ig sys.path.insert(0, os.path.abspath('../../packages/')) import leidenalg G = ig.Graph(edges=edges) partition = partition = leidenalg.CPMVertexPartition( G, resolution_parameter=0.002,
def compute_communities(self): '''Compute communities from a matrix with fixed nodes Returns: None, but SemiAnnotate.membership is set as an array of int with size N - n_fixed with the community/cluster membership of all columns except the first n_fixed ones. ''' import inspect import igraph as ig import leidenalg # Check whether this version of Leiden has fixed nodes support opt = leidenalg.Optimiser() sig = inspect.getfullargspec(opt.optimise_partition) if 'fixed_nodes' not in sig.args: raise ImportError( 'This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version' ) matrix = self.matrix aa = self.cell_types aau = np.unique(aa) n_fixed = self.n_fixed clustering_metric = self.clustering_metric resolution_parameter = self.resolution_parameter neighbors = self.neighbors L, N = matrix.shape # Construct graph from the lists of neighbors edges_d = set() for i, neis in enumerate(neighbors): for n in neis: edges_d.add(frozenset((i, n))) edges = [tuple(e) for e in edges_d] g = ig.Graph(n=N, edges=edges, directed=False) # NOTE: initial membership is singletons except for atlas nodes, which # get the membership they have. aaun = len(aau) initial_membership = [] for j in range(N): if j < self.n_fixed: mb = aau.index(aa[j]) else: mb = aaun + (j - n_fixed) initial_membership.append(mb) # Compute communities with semi-supervised Leiden if clustering_metric == 'cpm': partition = leidenalg.CPMVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=initial_membership, ) elif clustering_metric == 'modularity': partition = leidenalg.ModularityVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=initial_membership, ) else: raise ValueError('clustering_metric not understood: {:}'.format( clustering_metric)) fixed_nodes = [int(i < n_fixed) for i in range(N)] opt.optimise_partition(partition, fixed_nodes=fixed_nodes) membership = partition.membership[n_fixed:] # Convert the known cell types lstring = len(max(self.cell_types, key=len)) self.membership = np.array([str(x) for x in membership], dtype='U{:}'.format(lstring)) for i, ct in enumerate(self.cell_types): self.membership[self.membership == str(i)] = ct
def leiden( self, axis, edges, edge_weights=None, metric='cpm', resolution_parameter=0.001, initial_membership=None, fixed_nodes=None, ): '''Graph-based Leiden clustering Args: axis (string): It must be 'samples' or 'features'. The Dataset.counts matrix is used and either samples or features are clustered. edges (list of pairs): list of edges to make a graph used to cluster. Each member of a pair is an int referring to the index of the sample or feature in the sample/featuresheet. edge_weights (list of float or None): edge weights to use for clustering. If None, all edge weights are 1. metric (str): What metric to optimize. Can be 'modularity' or 'cpm'. resolution_parameter (float): a number between 0 and 1 that sets how easy it is to call new clusters. initial_membership (str or None): name of a metadata column containing the initial membership vector for the clustering. If None (default), each samples starts as a singleton fixed_nodes (str or None): name of a metadata column containing a boolean vector for which nodes are not allowed to change cluster membership during the Leiden algorithm. Your version of leidenalg must support fixed nodes for this feature to work. Returns: pd.Series with the labels of the clusters. ''' import igraph as ig import leidenalg if axis == 'samples': n_nodes = self.dataset.n_samples index = self.dataset.samplenames elif axis == 'features': n_nodes = self.dataset.n_features index = self.dataset.featurenames g = ig.Graph(n=n_nodes, edges=edges, directed=False) if edge_weights is not None: g.es['weight'] = edge_weights if initial_membership is not None: if axis == 'samples': im = self.dataset.samplesheet[ initial_membership].values.astype(int) else: im = self.dataset.featuresheet[ initial_membership].values.astype(int) else: im = np.arange(n_nodes) im = list(im) if metric == 'cpm': partition = leidenalg.CPMVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=im, ) elif metric == 'modularity': partition = leidenalg.ModularityVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=im, ) else: raise ValueError( 'clustering_metric not understood: {:}'.format(metric)) opt = leidenalg.Optimiser() if fixed_nodes is not None: if axis == 'samples': fxn = self.dataset.samplesheet[fixed_nodes].values.astype(int) else: fxn = self.dataset.featuresheet[fixed_nodes].values.astype(int) fxn = list(fxn) opt.optimise_partition(partition, fixed_nodes=fxn) else: opt.optimise_partition(partition) communities = partition.membership labels = pd.Series(communities, index=index) return labels
def cluster_graph(self): '''Compute communities from a matrix with fixed nodes Returns: None, but Averages.membership is set as an array with size N - n_fixed with the atlas cell types of all cells from the new dataset. ''' import inspect import leidenalg # Check whether this version of Leiden has fixed nodes support opt = leidenalg.Optimiser() sig = inspect.getfullargspec(opt.optimise_partition) if 'fixed_nodes' not in sig.args: raise ImportError('This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version') matrix = self.matrix sizes = self.sizes n_fixed = self.n_fixed clustering_metric = self.clustering_metric resolution_parameter = self.resolution_parameter g = self.graph L, N = matrix.shape n_fixede = int(np.sum(sizes[:n_fixed])) Ne = int(np.sum(sizes)) # NOTE: initial membership is singletons except for atlas nodes, which # get the membership they have. initial_membership = [] for isi in range(N): if isi < n_fixed: for ii in range(int(self.sizes[isi])): initial_membership.append(isi) else: initial_membership.append(isi) if len(initial_membership) != Ne: raise ValueError('initial_membership list has wrong length!') # Compute communities with semi-supervised Leiden if clustering_metric == 'cpm': partition = leidenalg.CPMVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=initial_membership, ) elif clustering_metric == 'modularity': partition = leidenalg.ModularityVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=initial_membership, ) else: raise ValueError( 'clustering_metric not understood: {:}'.format(clustering_metric)) fixed_nodes = [int(i < n_fixede) for i in range(Ne)] opt.optimise_partition(partition, fixed_nodes=fixed_nodes) membership = partition.membership[n_fixede:] # Convert the known cell types lstring = len(max(self.cell_types, key=len)) self.membership = np.array( [str(x) for x in membership], dtype='U{:}'.format(lstring)) for i, ct in enumerate(self.cell_types): self.membership[self.membership == str(i)] = ct