def test_move_nodes(self):
   G = ig.Graph.Full(100);
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5);
   self.optimiser.move_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS);
   self.assertListEqual(
       partition.sizes(), [100],
       msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after move nodes incorrect.");
 def test_optimiser(self):
   G = reduce(ig.Graph.disjoint_union, (ig.Graph.Tree(10, 3, mode=ig.TREE_UNDIRECTED) for i in range(10)));
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0);
   self.optimiser.consider_comms=leidenalg.ALL_NEIGH_COMMS;
   self.optimiser.optimise_partition(partition);
   self.assertListEqual(
       partition.sizes(), 10*[10],
       msg="After optimising partition failed to find different components with CPMVertexPartition(resolution_parameter=0)");
 def test_merge_nodes_with_max_comm_size(self):
   G = ig.Graph.Full(100);
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5);
   self.optimiser.max_comm_size = 17
   self.optimiser.merge_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS);
   self.assertListEqual(
       partition.sizes(), [17, 17, 17, 17, 17, 15],
       msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after merge nodes (max_comm_size=17) incorrect.");
Exemple #4
0
    def fit(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but the membership attribute is set as an array of int with
            size N - n_fixed with the community/cluster membership of all
            columns except the first n fixed ones.
        '''
        self._parse_graph()

        aa = self.annotations
        n_fixed = len(aa)
        g = self.graph
        N = g.vcount()

        opt = leidenalg.Optimiser()
        fixed_nodes = [int(i < n_fixed) for i in range(N)]

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        aau = list(np.unique(aa))
        aaun = len(aau)
        initial_membership = []
        for j in range(N):
            if j < n_fixed:
                mb = aau.index(aa[j])
            else:
                mb = aaun + (j - n_fixed)
            initial_membership.append(mb)

        if self.metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                g,
                resolution_parameter=self.resolution_parameter,
                initial_membership=initial_membership,
            )
        elif self.metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                g,
                resolution_parameter=self.resolution_parameter,
                initial_membership=initial_membership,
            )
        else:
            raise ValueError('clustering_metric not understood: {:}'.format(
                self.metric))

        # Run modified Leiden here
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)

        # Exctract result
        membership = partition.membership[n_fixed:]

        # Convert the known cell types
        lstring = len(max(aau, key=len))
        self.membership = np.array([str(x) for x in membership],
                                   dtype='U{:}'.format(lstring))
        for i, ct in enumerate(aau):
            self.membership[self.membership == str(i)] = ct
 def test_optimiser_with_max_comm_size(self):
   G = ig.Graph.Full(100);
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0);
   self.optimiser.consider_comms=leidenalg.ALL_NEIGH_COMMS;
   self.optimiser.max_comm_size = 10
   self.optimiser.optimise_partition(partition);
   self.assertListEqual(
       partition.sizes(), 10*[10],
       msg="After optimising partition (max_comm_size=10) failed to find different components with CPMVertexPartition(resolution_parameter=0)");
 def test_neg_weight_bipartite(self):
   G = ig.Graph.Full_Bipartite(50, 50);
   G.es['weight'] = -0.1;
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=-0.1, weights='weight');
   self.optimiser.consider_comms=leidenalg.ALL_COMMS;
   self.optimiser.optimise_partition(partition);
   self.assertListEqual(
       partition.sizes(), 2*[50],
       msg="After optimising partition failed to find bipartite structure with CPMVertexPartition(resolution_parameter=-0.1)");
 def test_diff_move_node_optimality(self):
   G = ig.Graph.Erdos_Renyi(100, p=5./100, directed=False, loops=False);
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.1);
   while 0 < self.optimiser.move_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS):
     pass;
   for v in G.vs:
     neigh_comms = set(partition.membership[u.index] for u in v.neighbors());
     for c in neigh_comms:
       self.assertLessEqual(
         partition.diff_move(v.index, c), 1e-10, # Allow for a small difference up to rounding error.
         msg="Was able to move a node to a better community, violating node optimality.");
 def test_merge_nodes(self):
   G = ig.Graph.Full(100);
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5);
   self.optimiser.merge_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS);
   self.assertListEqual(
       partition.sizes(), [100],
       msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after merge nodes incorrect.");
   self.assertEqual(
       partition.total_weight_in_all_comms(),
       G.ecount(),
       msg="total_weight_in_all_comms not equal to ecount of graph.");
 def test_move_nodes_with_fixed(self):
   # One edge plus singleton, but the two connected nodes are fixed
   G = ig.Graph([(0, 2)])
   is_membership_fixed = [True, False, True]
   partition = leidenalg.CPMVertexPartition(
           G,
           resolution_parameter=0.1);
   self.optimiser.move_nodes(partition, is_membership_fixed=is_membership_fixed, consider_comms=leidenalg.ALL_NEIGH_COMMS);
   self.assertListEqual(
       partition.sizes(), [1, 1, 1],
       msg="CPMVertexPartition(resolution_parameter=0.1) of one edge plus singleton after move nodes with fixed nodes is incorrect.");
 def test_optimiser_split_with_max_comm_size(self):
   G = ig.Graph.Full(100);
   partition = leidenalg.CPMVertexPartition(G, resolution_parameter=0.5);
   self.optimiser.merge_nodes(partition, consider_comms=leidenalg.ALL_NEIGH_COMMS);
   self.assertListEqual(
       partition.sizes(), [100],
       msg="CPMVertexPartition(resolution_parameter=0.5) of complete graph after merge nodes incorrect.");
   self.optimiser.max_comm_size = 10
   self.optimiser.optimise_partition(partition);
   self.assertListEqual(
       partition.sizes(), 10*[10],
       msg="After optimising partition (max_comm_size=10) failed to find different components with CPMVertexPartition(resolution_parameter=0.5)");
Exemple #11
0
 def test_optimiser_with_fixed_nodes(self):
     G = ig.Graph.Full(3)
     partition = leidenalg.CPMVertexPartition(G,
                                              resolution_parameter=0.01,
                                              initial_membership=[2, 1, 0])
     # Equivalent to setting initial membership
     #partition.set_membership([2, 1, 2])
     opt = leidenalg.Optimiser()
     fixed_nodes = [True, False, False]
     opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
     self.assertListEqual(
         partition.membership, [2, 2, 2],
         msg=
         "After optimising partition with fixed nodes failed to recover initial fixed memberships"
     )
 def test_optimiser_with_is_membership_fixed(self):
     G = ig.Graph.Full(3)
     partition = leidenalg.CPMVertexPartition(
             G,
             resolution_parameter=0.01,
             initial_membership=[2, 1, 0])
     # Equivalent to setting initial membership
     #partition.set_membership([2, 1, 2])
     is_membership_fixed = [True, False, False]
     original_quality = partition.quality()
     diff = self.optimiser.optimise_partition(partition, is_membership_fixed=is_membership_fixed)
     self.assertAlmostEqual(partition.quality() - original_quality, diff, places=10,
                            msg="Optimisation with fixed nodes returns inconsistent quality")
     self.assertListEqual(
           partition.membership, [2, 2, 2],
           msg="After optimising partition with fixed nodes failed to recover initial fixed memberships"
           )
Exemple #13
0
    fn_anno = '../../data/sequencing/{:}-mRNA/samplesheet_with_Leiden_community_and_coverage.tsv'.format(
        sn)
    if os.path.isfile(fn_anno):
        print('Load clusters from file')
        ds.samplesheet['community'] = pd.read_csv(fn_anno,
                                                  sep='\t',
                                                  index_col=0)['community']
    else:
        print('Unsupervised clustering')
        import igraph as ig
        sys.path.insert(0, os.path.abspath('../../packages/'))
        import leidenalg
        G = ig.Graph(edges=edges)
        partition = partition = leidenalg.CPMVertexPartition(
            G,
            resolution_parameter=0.01,
        )
        opt = leidenalg.Optimiser()
        opt.optimise_partition(partition)
        communities = partition.membership
        print('n. communities: {:}'.format(len(np.unique(communities))))
        ds.samplesheet['community'] = communities

    print('Unsupervised clustering, rough')
    import igraph as ig
    sys.path.insert(0, os.path.abspath('../../packages/'))
    import leidenalg
    G = ig.Graph(edges=edges)
    partition = partition = leidenalg.CPMVertexPartition(
        G,
        resolution_parameter=0.002,
Exemple #14
0
    def compute_communities(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but SemiAnnotate.membership is set as an array of int with
            size N - n_fixed with the community/cluster membership of all
            columns except the first n_fixed ones.
        '''
        import inspect
        import igraph as ig
        import leidenalg

        # Check whether this version of Leiden has fixed nodes support
        opt = leidenalg.Optimiser()
        sig = inspect.getfullargspec(opt.optimise_partition)
        if 'fixed_nodes' not in sig.args:
            raise ImportError(
                'This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version'
            )

        matrix = self.matrix
        aa = self.cell_types
        aau = np.unique(aa)
        n_fixed = self.n_fixed
        clustering_metric = self.clustering_metric
        resolution_parameter = self.resolution_parameter
        neighbors = self.neighbors

        L, N = matrix.shape

        # Construct graph from the lists of neighbors
        edges_d = set()
        for i, neis in enumerate(neighbors):
            for n in neis:
                edges_d.add(frozenset((i, n)))

        edges = [tuple(e) for e in edges_d]
        g = ig.Graph(n=N, edges=edges, directed=False)

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        aaun = len(aau)
        initial_membership = []
        for j in range(N):
            if j < self.n_fixed:
                mb = aau.index(aa[j])
            else:
                mb = aaun + (j - n_fixed)
            initial_membership.append(mb)

        # Compute communities with semi-supervised Leiden
        if clustering_metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=initial_membership,
            )
        elif clustering_metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=initial_membership,
            )
        else:
            raise ValueError('clustering_metric not understood: {:}'.format(
                clustering_metric))

        fixed_nodes = [int(i < n_fixed) for i in range(N)]
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
        membership = partition.membership[n_fixed:]

        # Convert the known cell types
        lstring = len(max(self.cell_types, key=len))
        self.membership = np.array([str(x) for x in membership],
                                   dtype='U{:}'.format(lstring))
        for i, ct in enumerate(self.cell_types):
            self.membership[self.membership == str(i)] = ct
Exemple #15
0
    def leiden(
        self,
        axis,
        edges,
        edge_weights=None,
        metric='cpm',
        resolution_parameter=0.001,
        initial_membership=None,
        fixed_nodes=None,
    ):
        '''Graph-based Leiden clustering

        Args:
            axis (string): It must be 'samples' or 'features'.
                The Dataset.counts matrix is used and
                either samples or features are clustered.
            edges (list of pairs): list of edges to make a graph used to
            cluster. Each member of a pair is an int referring to the index
            of the sample or feature in the sample/featuresheet.
            edge_weights (list of float or None): edge weights to use for
            clustering. If None, all edge weights are 1.
            metric (str): What metric to optimize. Can be 'modularity' or
            'cpm'.
            resolution_parameter (float): a number between 0 and 1 that sets
            how easy it is to call new clusters.
            initial_membership (str or None): name of a metadata column
            containing the initial membership vector for the clustering. If
            None (default), each samples starts as a singleton
            fixed_nodes (str or None): name of a metadata column containing
            a boolean vector for which nodes are not allowed to change
            cluster membership during the Leiden algorithm. Your version of
            leidenalg must support fixed nodes for this feature to work.

        Returns:
            pd.Series with the labels of the clusters.
        '''
        import igraph as ig
        import leidenalg

        if axis == 'samples':
            n_nodes = self.dataset.n_samples
            index = self.dataset.samplenames
        elif axis == 'features':
            n_nodes = self.dataset.n_features
            index = self.dataset.featurenames

        g = ig.Graph(n=n_nodes, edges=edges, directed=False)
        if edge_weights is not None:
            g.es['weight'] = edge_weights

        if initial_membership is not None:
            if axis == 'samples':
                im = self.dataset.samplesheet[
                    initial_membership].values.astype(int)
            else:
                im = self.dataset.featuresheet[
                    initial_membership].values.astype(int)
        else:
            im = np.arange(n_nodes)
        im = list(im)

        if metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=im,
            )
        elif metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=im,
            )
        else:
            raise ValueError(
                'clustering_metric not understood: {:}'.format(metric))

        opt = leidenalg.Optimiser()

        if fixed_nodes is not None:
            if axis == 'samples':
                fxn = self.dataset.samplesheet[fixed_nodes].values.astype(int)
            else:
                fxn = self.dataset.featuresheet[fixed_nodes].values.astype(int)
            fxn = list(fxn)

            opt.optimise_partition(partition, fixed_nodes=fxn)
        else:
            opt.optimise_partition(partition)

        communities = partition.membership

        labels = pd.Series(communities, index=index)

        return labels
Exemple #16
0
    def cluster_graph(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but Averages.membership is set as an array with
            size N - n_fixed with the atlas cell types of all cells from the
            new dataset.
        '''
        import inspect
        import leidenalg

        # Check whether this version of Leiden has fixed nodes support
        opt = leidenalg.Optimiser()
        sig = inspect.getfullargspec(opt.optimise_partition)
        if 'fixed_nodes' not in sig.args:
            raise ImportError('This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version')

        matrix = self.matrix
        sizes = self.sizes
        n_fixed = self.n_fixed
        clustering_metric = self.clustering_metric
        resolution_parameter = self.resolution_parameter
        g = self.graph

        L, N = matrix.shape
        n_fixede = int(np.sum(sizes[:n_fixed]))
        Ne = int(np.sum(sizes))

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        initial_membership = []
        for isi in range(N):
            if isi < n_fixed:
                for ii in range(int(self.sizes[isi])):
                    initial_membership.append(isi)
            else:
                initial_membership.append(isi)

        if len(initial_membership) != Ne:
            raise ValueError('initial_membership list has wrong length!')

        # Compute communities with semi-supervised Leiden
        if clustering_metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                    g,
                    resolution_parameter=resolution_parameter,
                    initial_membership=initial_membership,
                    )
        elif clustering_metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                    g,
                    resolution_parameter=resolution_parameter,
                    initial_membership=initial_membership,
                    )
        else:
            raise ValueError(
                'clustering_metric not understood: {:}'.format(clustering_metric))

        fixed_nodes = [int(i < n_fixede) for i in range(Ne)]
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
        membership = partition.membership[n_fixede:]

        # Convert the known cell types
        lstring = len(max(self.cell_types, key=len))
        self.membership = np.array(
                [str(x) for x in membership],
                dtype='U{:}'.format(lstring))
        for i, ct in enumerate(self.cell_types):
            self.membership[self.membership == str(i)] = ct