Example #1
0
def leiden_clustering(umap_res,
                      resolution_range=(0, 1),
                      random_state=2,
                      kdtree_dist='euclidean'):
    tree = neighbors.KDTree(umap_res, metric=kdtree_dist)
    vals, i, j = [], [], []
    for idx in range(umap_res.shape[0]):
        dist, ind = tree.query([umap_res[idx]], k=25)
        vals.extend(list(dist.squeeze()))
        j.extend(list(ind.squeeze()))
        i.extend([idx] * len(ind.squeeze()))
    print(len(vals))
    ginput = sps.csc_matrix(
        (numpy.array(vals), (numpy.array(i), numpy.array(j))),
        shape=(umap_res.shape[0], umap_res.shape[0]))
    sources, targets = ginput.nonzero()
    edgelist = zip(sources.tolist(), targets.tolist())
    G = ig.Graph(edges=list(edgelist))
    optimiser = leidenalg.Optimiser()
    optimiser.set_rng_seed(random_state)
    profile = optimiser.resolution_profile(G,
                                           leidenalg.CPMVertexPartition,
                                           resolution_range=resolution_range,
                                           number_iterations=0)
    print([len(elt) for elt in profile])
    return profile
Example #2
0
    def fit(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but the membership attribute is set as an array of int with
            size N - n_fixed with the community/cluster membership of all
            columns except the first n fixed ones.
        '''
        self._parse_graph()

        aa = self.annotations
        n_fixed = len(aa)
        g = self.graph
        N = g.vcount()

        opt = leidenalg.Optimiser()
        fixed_nodes = [int(i < n_fixed) for i in range(N)]

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        aau = list(np.unique(aa))
        aaun = len(aau)
        initial_membership = []
        for j in range(N):
            if j < n_fixed:
                mb = aau.index(aa[j])
            else:
                mb = aaun + (j - n_fixed)
            initial_membership.append(mb)

        if self.metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                g,
                resolution_parameter=self.resolution_parameter,
                initial_membership=initial_membership,
            )
        elif self.metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                g,
                resolution_parameter=self.resolution_parameter,
                initial_membership=initial_membership,
            )
        else:
            raise ValueError('clustering_metric not understood: {:}'.format(
                self.metric))

        # Run modified Leiden here
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)

        # Exctract result
        membership = partition.membership[n_fixed:]

        # Convert the known cell types
        lstring = len(max(aau, key=len))
        self.membership = np.array([str(x) for x in membership],
                                   dtype='U{:}'.format(lstring))
        for i, ct in enumerate(aau):
            self.membership[self.membership == str(i)] = ct
Example #3
0
 def _partition_graph(self, resolution):
     # https://github.com/vtraag/4TU-CSS/
     part, part0, part1 = la.CPMVertexPartition.Bipartite(
         self.graph, resolution_parameter_01=resolution
     )
     opt = la.Optimiser()
     opt.optimise_partition_multiplex(
         [part, part0, part1], layer_weights=[1, -1, -1], n_iterations=100
     )
     return part
Example #4
0
def CPM_Bipartite(g_original, resolution_parameter_01,
                  resolution_parameter_0=0, resolution_parameter_1=0, degree_as_node_size=False, seed=0):
    """
    CPM_Bipartite is the extension of CPM to bipartite graphs

    :param g_original: a networkx/igraph object
    :param resolution_parameter_01: Resolution parameter for in between two classes.
    :param resolution_parameter_0: Resolution parameter for class 0.
    :param resolution_parameter_1: Resolution parameter for class 1.
    :param degree_as_node_size: If ``True`` use degree as node size instead of 1, to mimic modularity
    :param seed: the random seed to be used in CPM method to keep results/partitions replicable
    :return: BiNodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.algorithms.bipartite.generators.random_graph(100, 20, 0.5)
    >>> coms = algorithms.CPM_Bipartite(G, 1)

    :References:

    Barber, M. J. (2007). Modularity and community detection in bipartite networks. Physical Review E, 76(6), 066102. 10.1103/PhysRevE.76.066102

    .. note:: Reference implementation: https://leidenalg.readthedocs.io/en/stable/multiplex.html?highlight=bipartite#bipartite
    """
    if ig is None or leidenalg is None:
        raise ModuleNotFoundError("Optional dependency not satisfied: install igraph and leidenalg to use the "
                                  "selected feature.")

    g = convert_graph_formats(g_original, ig.Graph)

    try:
        g.vs['name']
    except:
        g.vs['name'] = [v.index for v in g.vs]

    optimiser = leidenalg.Optimiser()
    leidenalg.Optimiser.set_rng_seed(self=optimiser, value=seed)

    p_01, p_0, p_1 = leidenalg.CPMVertexPartition.Bipartite(g, resolution_parameter_01=resolution_parameter_01,
                                                            resolution_parameter_0=resolution_parameter_0,
                                                            resolution_parameter_1=resolution_parameter_1,
                                                            degree_as_node_size=degree_as_node_size)
    optimiser.optimise_partition_multiplex([p_01, p_0, p_1], layer_weights=[1, -1, -1])

    coms = defaultdict(list)
    for n in g.vs:
        coms[p_01.membership[n.index]].append(n.index)

    return BiNodeClustering(list(coms.values()), [], g_original, "CPM_Bipartite",
                            method_parameters={"resolution_parameter_0": resolution_parameter_01,
                                               "resolution_parameter_0": resolution_parameter_0,
                                               "resolution_parameter_1": resolution_parameter_1,
                                               "degree_as_node_size": degree_as_node_size, "seed": seed})
    def community_consensus_iterative(self, C):
        ## function finding the consensus of a given set of partitions. refer to the paper:
        ## 'Robust detection of dynamic community structure in networks', Danielle S. Bassett,
        ## Mason A. Porter, Nicholas F. Wymbs, Scott T. Grafton, Jean M. Carlson et al.

        npart, m = C.shape
        C_rand3 = np.zeros((C.shape))  #permuted version of C
        X = np.zeros((m, m))  #Nodal association matrix for C
        X_rand3 = X  # Random nodal association matrix for C_rand3

        # randomly permute rows of C
        for i in range(npart):
            C_rand3[i, :] = C[i, np.random.permutation(m)]
            for k in range(m):
                for p in range(m):
                    if int(C[i, k]) == int(C[i, p]):
                        X[p, k] = X[
                            p,
                            k] + 1  #(i,j) is the # of times node i and j are assigned in the same comm
                    if int(C_rand3[i, k]) == int(C_rand3[i, p]):
                        X_rand3[p, k] = X_rand3[
                            p,
                            k] + 1  #(i,j) is the # of times node i and j are expected to be assigned in the same comm by chance
        #thresholding
        #keep only associated assignments that occur more often than expected in the random data

        X_new3 = np.zeros((m, m))
        X_new3[X > (np.max(np.triu(X_rand3, 1))) /
               2] = X[X > (np.max(np.triu(X_rand3, 1))) / 2]

        ##turn thresholded nodal association matrix into igraph
        edge_list = []
        weight_list = []
        for k, e in enumerate(np.transpose(np.nonzero(X_new3))):
            i, j = e[0], e[1]
            pair = (i, j)
            edge_list.append(pair)
            weight_list.append(X_new3[i][j])

        G = ig.Graph()
        G.add_vertices(m)
        G.add_edges(edge_list)
        G.es['weight'] = weight_list
        G.vs['id'] = list(range(m))

        optimiser = la.Optimiser()
        partition = la.ModularityVertexPartition(G, weights='weight')
        diff = optimiser.optimise_partition(partition, n_iterations=-1)

        return (partition)
Example #6
0
 def test_optimiser_with_fixed_nodes(self):
     G = ig.Graph.Full(3)
     partition = leidenalg.CPMVertexPartition(G,
                                              resolution_parameter=0.01,
                                              initial_membership=[2, 1, 0])
     # Equivalent to setting initial membership
     #partition.set_membership([2, 1, 2])
     opt = leidenalg.Optimiser()
     fixed_nodes = [True, False, False]
     opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
     self.assertListEqual(
         partition.membership, [2, 2, 2],
         msg=
         "After optimising partition with fixed nodes failed to recover initial fixed memberships"
     )
Example #7
0
    def leiden(self, G, interslice, resolution):
        
        layers, interslice_layer, G_full = la.time_slices_to_layers(G, interslice_weight = interslice)
        
        partitions = [la.RBConfigurationVertexPartition(H, 
                                            weights = 'weight', 
                                            resolution_parameter = resolution) for H in layers]
        
        interslice_partition = la.RBConfigurationVertexPartition(interslice_layer, 
                                                                 weights = 'weight',
                                                                 resolution_parameter = 0)
                                                     
        optimiser = la.Optimiser()
        
        diff = optimiser.optimise_partition_multiplex(partitions + [interslice_partition])

        return(partitions, interslice_partition)
Example #8
0
 def _partition_graph(self, resolution: float, seed: int) -> ig.VertexClustering:
     if self.graph.is_bipartite():
         part, part0, part1 = la.CPMVertexPartition.Bipartite(
             self.graph, resolution_parameter_01=resolution, weights="weight"
         )
         opt = la.Optimiser()
         opt.set_rng_seed(seed)
         opt.optimise_partition_multiplex(
             [part, part0, part1], layer_weights=[1, -1, -1], n_iterations=-1
         )
     else:
         part = la.find_partition(
             self.graph,
             la.ModularityVertexPartition,
             weights="weight",
             n_iterations=-1,
             seed=seed,
         )
     return part
Example #9
0
    def runCommunityWithMultiplex(self):
        """
        Primary method for now
        TODO: update with moer features
        Currently takes all forests in hypha (defined by nodes on shared edge set)
        and finds communities
        """
        print('running community detection')
        optimizer = la.Optimiser()
        netlist = []
        all_nodes = set()
        for pat, vals in self.forests.items():
            all_nodes.update(
                [self.interactome.vs['name'][i] for i in vals['vert']])
        print("Have", len(all_nodes), 'total nodes')
        for_graph = self.interactome  #.copy()#.subgraph(all_nodes)
        for nx_g in self.forests.values(
        ):  ##i'm not convince the forests have the same node/edge indices
            tmp_g = for_graph.subgraph_edges(nx_g['edge'],\
                                             delete_vertices=False)#Graph()#nx_g.copy() ###this is apointer, make sur
            netlist.append(tmp_g)
            print('Added network of', len(tmp_g.vs), 'vertices and',
                  len(tmp_g.es), 'edges')
        [membership, improv] = la.find_partition_multiplex(netlist,\
                                                           #la.RBERVertexPartition)

                                                           la.ModularityVertexPartition)
        comm_df = pd.DataFrame({'Node': for_graph.vs['name'],\
                                'Community': membership})
        comm_counts = comm_df.groupby("Community")['Node'].count()
        comm_dict = dict(comm_df.groupby('Community')['Node'].apply(list))
        red_list = [comm_dict[c] for c in comm_counts.index[comm_counts > 5]]
        red_dict = dict(zip(comm_counts.index[comm_counts > 5], red_list))
        red_graph = {}
        for comm, vals in red_dict.items():
            rgraph = self.interactome.subgraph(vals)
            red_graph[comm] = rgraph
            print("Community", comm, " graph has", len(vals),'proteins and',\
                  len(rgraph.components()), 'component')
        return red_dict, red_graph
Example #10
0
def run_alg(Gs, alg, gamma=1.0, sample=1.0, layer_weights=None):
    '''
    Run community detection algorithm with a resolution parameter. Right now only use RB in Louvain/Leiden

    Parameters
    ----------
    Gs : a list of igraph.Graph
    alg : str
        choose between 'louvain' and 'leiden'
    gamma : float
        resolution parameter
    sample : if smaller than 1, randomly delete a fraction of edges each time
    layer_weights: a list of float
        specifying layer weights in the multilayer setting
    Returns
    ------
    C: scipy.sparse.csr_matrix
        a matrix recording the membership of each cluster

    '''
    if len(Gs) == 1:
        G = Gs[0]
        G1 = G.copy()
        if sample < 1:
            G1 = network_perturb(G, sample)
        if alg == 'louvain':
            partition_type = louvain.RBConfigurationVertexPartition
            partition = louvain.find_partition(G1,
                                               partition_type,
                                               resolution_parameter=gamma)
        elif alg == 'leiden':
            partition_type = leidenalg.RBConfigurationVertexPartition
            partition = leidenalg.find_partition(G1,
                                                 partition_type,
                                                 resolution_parameter=gamma)
        partitions = [partition]
    else:  # multiplex mode
        if layer_weights == None:
            layer_weights = [1.0 for _ in Gs]
        assert len(layer_weights) == len(
            Gs), 'layer weights inconsistent with the number of input networks'
        Gs1 = [G.copy() for G in Gs]
        if sample < 1:
            Gs1 = [network_perturb(G, sample) for G in Gs]
        if alg == 'louvain':
            partition_type = louvain.RBConfigurationVertexPartition
            optimiser = louvain.Optimiser()
            partitions = [
                partition_type(G, resolution_parameter=gamma) for G in Gs1
            ]
            _ = optimiser.optimise_partition_multiplex(
                partitions, layer_weights=layer_weights)
        elif alg == 'leiden':
            partition_type = leidenalg.RBConfigurationVertexPartition
            # partition = leidenalg.find_partition_multiplex(Gs1, partition_type, resolution_parameter=gamma,
            #                                                layer_weights=layer_weights)
            optimiser = leidenalg.Optimiser()
            partitions = [
                partition_type(G, resolution_parameter=gamma) for G in Gs1
            ]
            _ = optimiser.optimise_partition_multiplex(
                partitions, n_iterations=-1, layer_weights=layer_weights
            )  # -1 means iterate until no further optimization
            # print([len(p) for p in partitions]) # debug

    # partition = sorted(partition, key=len, reverse=True)
    LOGGER.info('Resolution: {:.4f}; find {} clusters'.format(
        gamma, len(partitions[0])))

    return partition_to_membership_matrix(partitions[0])
Example #11
0
def spectral_leiden(
    data: MultimodalData,
    rep: str = "pca",
    resolution: float = 1.3,
    rep_kmeans: str = "diffmap",
    n_clusters: int = 30,
    n_clusters2: int = 50,
    n_init: int = 10,
    n_jobs: int = -1,
    random_state: int = 0,
    class_label: str = "spectral_leiden_labels",
) -> None:
    """Cluster the data using Spectral Leiden algorithm. [Li20]_

    Parameters
    ----------
    data: ``pegasusio.MultimodalData``
        Annotated data matrix with rows for cells and columns for genes.

    rep: ``str``, optional, default: ``"pca"``
        The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates.

    resolution: ``int``, optional, default: ``1.3``
        Resolution factor. Higher resolution tends to find more clusters.

    rep_kmeans: ``str``, optional, default: ``"diffmap"``
        The embedding representation on which the KMeans runs. Keyword must exist in ``data.obsm``. By default, use Diffusion Map coordinates. If diffmap is not calculated, use PCA coordinates instead.

    n_clusters: ``int``, optional, default: ``30``
        The number of first level clusters.

    n_clusters2: ``int``, optional, default: ``50``
        The number of second level clusters.

    n_init: ``int``, optional, default: ``10``
        Number of kmeans tries for the first level clustering. Default is set to be the same as scikit-learn Kmeans function.

    n_jobs : `int`, optional (default: -1)
        Number of threads to use for the KMeans step. -1 refers to using all physical CPU cores.

    random_state: ``int``, optional, default: ``0``
        Random seed for reproducing results.

    class_label: ``str``, optional, default: ``"spectral_leiden_labels"``
        Key name for storing cluster labels in ``data.obs``.

    Returns
    -------
    ``None``

    Update ``data.obs``:
        * ``data.obs[class_label]``: Cluster labels for cells as categorical data.

    Examples
    --------
    >>> pg.spectral_leiden(data)
    """

    try:
        import leidenalg
    except ImportError:
        import sys
        logger.error("Need leidenalg! Try 'pip install leidenalg'.")
        sys.exit(-1)

    if f"X_{rep_kmeans}" not in data.obsm.keys():
        logger.warning(
            f"{rep_kmeans} is not calculated, switch to pca instead.")
        rep_kmeans = "pca"
        if f"X_{rep_kmeans}" not in data.obsm.keys():
            raise ValueError(f"Please run {rep_kmeans} first!")
    if f"W_{rep}" not in data.obsp:
        raise ValueError(
            "Cannot find affinity matrix. Please run neighbors first!")

    labels = partition_cells_by_kmeans(
        data.obsm[f"X_{rep_kmeans}"],
        n_clusters,
        n_clusters2,
        n_init,
        n_jobs,
        random_state,
    )

    W = data.obsp[f"W_{rep}"]

    G = construct_graph(W)
    partition_type = leidenalg.RBConfigurationVertexPartition
    partition = partition_type(G,
                               resolution_parameter=resolution,
                               weights="weight",
                               initial_membership=labels)
    partition_agg = partition.aggregate_partition()

    optimiser = leidenalg.Optimiser()
    optimiser.set_rng_seed(random_state)
    diff = optimiser.optimise_partition(partition_agg, -1)
    partition.from_coarse_partition(partition_agg)

    labels = np.array([str(x + 1) for x in partition.membership])
    categories = natsorted(np.unique(labels))
    data.obs[class_label] = pd.Categorical(values=labels,
                                           categories=categories)
    data.register_attr(class_label, "cluster")

    n_clusters = data.obs[class_label].cat.categories.size
    logger.info(
        f"Spectral Leiden clustering is done. Get {n_clusters} clusters.")
Example #12
0
def spectral_leiden(
    data: AnnData,
    rep: str = "pca",
    resolution: float = 1.3,
    rep_kmeans: str = "diffmap",
    n_clusters: int = 30,
    n_clusters2: int = 50,
    n_init: int = 10,
    n_jobs: int = -1,
    random_state: int = 0,
    class_label: str = "spectral_leiden_labels",
) -> None:
    """Cluster the data using Spectral Leiden algorithm.

    Parameters
    ----------
    data: ``anndata.AnnData``
        Annotated data matrix with rows for cells and columns for genes.

    rep: ``str``, optional, default: ``"pca"``
        The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates.

    resolution: ``int``, optional, default: ``1.3``
        Resolution factor. Higher resolution tends to find more clusters.

    rep_kmeans: ``str``, optional, default: ``"diffmap"``
        The embedding representation on which the KMeans runs. Keyword must exist in ``data.obsm``. By default, use Diffusion Map coordinates. If diffmap is not calculated, use PCA coordinates instead.

    n_clusters: ``int``, optional, default: ``30``
        The number of first level clusters.

    n_clusters2: ``int``, optional, default: ``50``
        The number of second level clusters.

    n_init: ``int``, optional, default: ``10``
        Number of kmeans tries for the first level clustering. Default is set to be the same as scikit-learn Kmeans function.

    n_jobs: ``int``, optional, default: ``-1``
        Number of threads to use. If ``-1``, use all available threads.

    random_state: ``int``, optional, default: ``0``
        Random seed for reproducing results.

    temp_folder: ``str``, optional, default: ``None``
        Temporary folder name for joblib to use during the computation.

    class_label: ``str``, optional, default: ``"spectral_leiden_labels"``
        Key name for storing cluster labels in ``data.obs``.

    Returns
    -------
    ``None``

    Update ``data.obs``:
        * ``data.obs[class_label]``: Cluster labels for cells as categorical data.

    Examples
    --------
    >>> pg.spectral_leiden(adata)
    """

    start = time.time()

    if "X_" + rep_kmeans not in data.obsm.keys():
        logger.warning("{} is not calculated, switch to pca instead.".format(rep_kmeans))
        rep_kmeans = "pca"
        if "X_" + rep_kmeans not in data.obsm.keys():
            raise ValueError("Please run {} first!".format(rep_kmeans))
    if "W_" + rep not in data.uns:
        raise ValueError("Cannot find affinity matrix. Please run neighbors first!")

    labels = partition_cells_by_kmeans(
        data, rep_kmeans, n_jobs, n_clusters, n_clusters2, n_init, random_state,
    )

    W = data.uns["W_" + rep]

    G = construct_graph(W)
    partition_type = leidenalg.RBConfigurationVertexPartition
    partition = partition_type(
        G, resolution_parameter=resolution, weights="weight", initial_membership=labels
    )
    partition_agg = partition.aggregate_partition()

    optimiser = leidenalg.Optimiser()
    optimiser.set_rng_seed(random_state)
    diff = optimiser.optimise_partition(partition_agg, -1)
    partition.from_coarse_partition(partition_agg)

    labels = np.array([str(x + 1) for x in partition.membership])
    categories = natsorted(np.unique(labels))
    data.obs[class_label] = pd.Categorical(values=labels, categories=categories)

    end = time.time()
    logger.info(
        "Spectral Leiden clustering is done. Time spent = {:.2f}s.".format(
            end - start
        )
    )
Example #13
0
    def compute_communities(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but SemiAnnotate.membership is set as an array of int with
            size N - n_fixed with the community/cluster membership of all
            columns except the first n_fixed ones.
        '''
        import inspect
        import igraph as ig
        import leidenalg

        # Check whether this version of Leiden has fixed nodes support
        opt = leidenalg.Optimiser()
        sig = inspect.getfullargspec(opt.optimise_partition)
        if 'fixed_nodes' not in sig.args:
            raise ImportError(
                'This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version'
            )

        matrix = self.matrix
        aa = self.cell_types
        aau = np.unique(aa)
        n_fixed = self.n_fixed
        clustering_metric = self.clustering_metric
        resolution_parameter = self.resolution_parameter
        neighbors = self.neighbors

        L, N = matrix.shape

        # Construct graph from the lists of neighbors
        edges_d = set()
        for i, neis in enumerate(neighbors):
            for n in neis:
                edges_d.add(frozenset((i, n)))

        edges = [tuple(e) for e in edges_d]
        g = ig.Graph(n=N, edges=edges, directed=False)

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        aaun = len(aau)
        initial_membership = []
        for j in range(N):
            if j < self.n_fixed:
                mb = aau.index(aa[j])
            else:
                mb = aaun + (j - n_fixed)
            initial_membership.append(mb)

        # Compute communities with semi-supervised Leiden
        if clustering_metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=initial_membership,
            )
        elif clustering_metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=initial_membership,
            )
        else:
            raise ValueError('clustering_metric not understood: {:}'.format(
                clustering_metric))

        fixed_nodes = [int(i < n_fixed) for i in range(N)]
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
        membership = partition.membership[n_fixed:]

        # Convert the known cell types
        lstring = len(max(self.cell_types, key=len))
        self.membership = np.array([str(x) for x in membership],
                                   dtype='U{:}'.format(lstring))
        for i, ct in enumerate(self.cell_types):
            self.membership[self.membership == str(i)] = ct
Example #14
0
mob['month'] = [x.month for x in mob['date']]
months = list(np.unique([x.month for x in mob['date']]))
#%%
mob = mob.groupby(['month', 'journey', 'start_quadkey', 'end_quadkey']).sum()['n_crisis'].reset_index()
#%%
#mob = mob.loc[[x in dates for x in mob['month']], :]
mob = mob.groupby('month')    
mob = [mob.get_group(x) for x in mob.groups]
assert len(months) == len(mob)
#%%
mob
#%%
hierarchy = {}
for i, month in enumerate(months):
    
    optimiser = leidenalg.Optimiser()

    G = od_igraph(mob[i])

    rp = optimiser.resolution_profile(G, leidenalg.CPMVertexPartition, min_diff_resolution = 0.01, resolution_range=(0,1), weights='weight')
    
    hierarchy[month] = rp
    
#%%
assert len(months) == len(hierarchy)
hierarchy
#%%
#redo this - one at a time - then abstract 
#in functions not loop list compression is faster?

def collapse_clusters(partition, G):
def community_detection(eID,
                        probe="both",
                        bin=0.02,
                        sensitivity=1,
                        visual=False,
                        feedbackType=None,
                        user_start="trial_start",
                        user_end="trial_end",
                        region_list=[],
                        difficulty=[-1, 1],
                        percentage=1,
                        data=None):
    """
    Function:
    Takes an experiment ID and makes community detection analysis 


    Parameters:
    eID: experiment ID 
    probe: name of the probe wanted or both for both probes
    bin: the size of the bin
    sensitivity: the sensibility parameter for the leiden algorithm
    visual: a boolean on whether visualization is wanted
    feedbackType: value for feedback wanted
    starts: the name of the type of start intervals
    ends: the name of the type of end intervals



    Return:
    partition: ig graph vertex partition object
    partition_dictionary: a dictionary with keys for each community and sets as values with the indices of the clusters that belong to that community, and the key
    region_dict: dictionary keyed by community number and value of a dictionary with the names of the brain regions of that community and their frequency
    locations: a list of the locations for each cluster
    
    
    Example:
    without a know path:
    >>>community_detection(
    >>community_detection(
            exp_ID,
            visual=True,
            probe="probe00",
            start="stimOn_times",
            end="response_times",
        )s
    with a known path "\\directory\\": 
        community_detection(
            exp_ID,
            visual=True,
            path="\\directory\\"
            probe="probe00",
            start="stimOn_times",
            end="response_times",
        )


    """
    if not bool(data):
        spikes, clusters, trials, locations = djl.loading(
            eID, probe, region_list)
    else:
        spikes, clusters, trials, locations = data

    starts, ends = section_trial(user_start, user_end, trials, feedbackType,
                                 difficulty, percentage)

    spikes_interval, clusters_interval = spp.interval_selection(
        spikes, clusters, starts, ends)

    spikes_matrix = bb.processing.bincount2D(
        spikes_interval,
        clusters_interval,
        xbin=bin,  # xlim=[0, nclusters]
    )[0]
    spikes_matrix_fixed = spp.addition_of_empty_neurons(
        spikes_matrix, clusters, clusters_interval)
    correlation_matrix_original = np.corrcoef(spikes_matrix_fixed)
    correlation_matrix = correlation_matrix_original[:, :]
    correlation_matrix[correlation_matrix < 0] = 0
    np.fill_diagonal(correlation_matrix, 0)
    neuron_graph = ig.Graph.Weighted_Adjacency(correlation_matrix.tolist(),
                                               mode="UNDIRECTED")
    neuron_graph.vs["label"] = [f"{i}" for i in range(np.max(clusters))]

    if sensitivity != 1:

        partition = la.RBConfigurationVertexPartition(
            neuron_graph, resolution_parameter=sensitivity)
        optimiser = la.Optimiser()
        optimiser.optimise_partition(partition)
    else:
        partition = la.find_partition(neuron_graph,
                                      la.ModularityVertexPartition)

    visualization(neuron_graph, partition) if visual else None
    partition_dictionary = dictionary_from_communities(partition)
    region_dict = location_dictionary(partition_dictionary, locations)
    return partition, partition_dictionary, region_dict, locations
Example #16
0
    def leiden(
        self,
        axis,
        edges,
        edge_weights=None,
        metric='cpm',
        resolution_parameter=0.001,
        initial_membership=None,
        fixed_nodes=None,
    ):
        '''Graph-based Leiden clustering

        Args:
            axis (string): It must be 'samples' or 'features'.
                The Dataset.counts matrix is used and
                either samples or features are clustered.
            edges (list of pairs): list of edges to make a graph used to
            cluster. Each member of a pair is an int referring to the index
            of the sample or feature in the sample/featuresheet.
            edge_weights (list of float or None): edge weights to use for
            clustering. If None, all edge weights are 1.
            metric (str): What metric to optimize. Can be 'modularity' or
            'cpm'.
            resolution_parameter (float): a number between 0 and 1 that sets
            how easy it is to call new clusters.
            initial_membership (str or None): name of a metadata column
            containing the initial membership vector for the clustering. If
            None (default), each samples starts as a singleton
            fixed_nodes (str or None): name of a metadata column containing
            a boolean vector for which nodes are not allowed to change
            cluster membership during the Leiden algorithm. Your version of
            leidenalg must support fixed nodes for this feature to work.

        Returns:
            pd.Series with the labels of the clusters.
        '''
        import igraph as ig
        import leidenalg

        if axis == 'samples':
            n_nodes = self.dataset.n_samples
            index = self.dataset.samplenames
        elif axis == 'features':
            n_nodes = self.dataset.n_features
            index = self.dataset.featurenames

        g = ig.Graph(n=n_nodes, edges=edges, directed=False)
        if edge_weights is not None:
            g.es['weight'] = edge_weights

        if initial_membership is not None:
            if axis == 'samples':
                im = self.dataset.samplesheet[
                    initial_membership].values.astype(int)
            else:
                im = self.dataset.featuresheet[
                    initial_membership].values.astype(int)
        else:
            im = np.arange(n_nodes)
        im = list(im)

        if metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=im,
            )
        elif metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                g,
                resolution_parameter=resolution_parameter,
                initial_membership=im,
            )
        else:
            raise ValueError(
                'clustering_metric not understood: {:}'.format(metric))

        opt = leidenalg.Optimiser()

        if fixed_nodes is not None:
            if axis == 'samples':
                fxn = self.dataset.samplesheet[fixed_nodes].values.astype(int)
            else:
                fxn = self.dataset.featuresheet[fixed_nodes].values.astype(int)
            fxn = list(fxn)

            opt.optimise_partition(partition, fixed_nodes=fxn)
        else:
            opt.optimise_partition(partition)

        communities = partition.membership

        labels = pd.Series(communities, index=index)

        return labels
Example #17
0
    def cluster_graph(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but Averages.membership is set as an array with
            size N - n_fixed with the atlas cell types of all cells from the
            new dataset.
        '''
        import inspect
        import leidenalg

        # Check whether this version of Leiden has fixed nodes support
        opt = leidenalg.Optimiser()
        sig = inspect.getfullargspec(opt.optimise_partition)
        if 'fixed_nodes' not in sig.args:
            raise ImportError('This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version')

        matrix = self.matrix
        sizes = self.sizes
        n_fixed = self.n_fixed
        clustering_metric = self.clustering_metric
        resolution_parameter = self.resolution_parameter
        g = self.graph

        L, N = matrix.shape
        n_fixede = int(np.sum(sizes[:n_fixed]))
        Ne = int(np.sum(sizes))

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        initial_membership = []
        for isi in range(N):
            if isi < n_fixed:
                for ii in range(int(self.sizes[isi])):
                    initial_membership.append(isi)
            else:
                initial_membership.append(isi)

        if len(initial_membership) != Ne:
            raise ValueError('initial_membership list has wrong length!')

        # Compute communities with semi-supervised Leiden
        if clustering_metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                    g,
                    resolution_parameter=resolution_parameter,
                    initial_membership=initial_membership,
                    )
        elif clustering_metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                    g,
                    resolution_parameter=resolution_parameter,
                    initial_membership=initial_membership,
                    )
        else:
            raise ValueError(
                'clustering_metric not understood: {:}'.format(clustering_metric))

        fixed_nodes = [int(i < n_fixede) for i in range(Ne)]
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
        membership = partition.membership[n_fixede:]

        # Convert the known cell types
        lstring = len(max(self.cell_types, key=len))
        self.membership = np.array(
                [str(x) for x in membership],
                dtype='U{:}'.format(lstring))
        for i, ct in enumerate(self.cell_types):
            self.membership[self.membership == str(i)] = ct
Example #18
0
 def setUp(self):
     self.optimiser = leidenalg.Optimiser()
Example #19
0
def resolution_profile(N,
                       sources,
                       targets,
                       partition_type,
                       resolution_range,
                       weights=None,
                       min_diff_bisect_value=1,
                       min_diff_resolution=0.001,
                       linear_bisection=False,
                       number_iterations=1,
                       rng_seed=None):

    # graph from adjacency matrix (passed in as sparse representation: N, sources, targets, weights)
    g = ig.Graph(directed=True)
    g.add_vertices(N)  # this adds adjacency.shape[0] vertices
    g.add_edges(list(zip(sources, targets)))

    # handle the possible partitions and their allowed args
    partition_kwargs = {}
    if partition_type is None:
        partition_type = la.RBConfigurationVertexPartition

    elif partition_type == "RBC":
        partition_type = la.RBConfigurationVertexPartition
        node_sizes = None

    elif partition_type == "RBERVertexPartition":
        partition_type = la.RBERVertexPartition

    elif partition_type == "CPM":
        partition_type = la.CPMVertexPartition

    else:
        print(
            f"bad partition_type: {partition_type}. Using RBConfigurationVertexPartition"
        )
        partition_type = la.RBConfigurationVertexPartition

    if weights is not None:
        g.es['weight'] = weights

    if node_sizes is not None:
        partition_kwargs['node_sizes'] = node_sizes

    start_time = time.time()

    optimiser = la.Optimiser()

    if rng_seed is not None:
        optimiser.set_rng_seed(rng_seed)

    # resolution_profile(graph, partition_type, resolution_range, weights=None, bisect_func=<function Optimiser.<lambda>>,
    #                   min_diff_bisect_value=1, min_diff_resolution=0.001, linear_bisection=False, number_iterations=1, **kwargs)
    profile = optimiser.resolution_profile(
        g,
        partition_type,
        resolution_range,
        weights=weights,
        min_diff_bisect_value=min_diff_bisect_value,
        min_diff_resolution=min_diff_resolution,
        linear_bisection=linear_bisection,
        number_iterations=number_iterations,
        **partition_kwargs)

    print(f"leidenalg took {time.time()-start_time}s")
    # print(f"number of clusters = {len(np.unique(groups))}")

    return profile
Example #20
0
    if os.path.isfile(fn_anno):
        print('Load clusters from file')
        ds.samplesheet['community'] = pd.read_csv(fn_anno,
                                                  sep='\t',
                                                  index_col=0)['community']
    else:
        print('Unsupervised clustering')
        import igraph as ig
        sys.path.insert(0, os.path.abspath('../../packages/'))
        import leidenalg
        G = ig.Graph(edges=edges)
        partition = partition = leidenalg.CPMVertexPartition(
            G,
            resolution_parameter=0.01,
        )
        opt = leidenalg.Optimiser()
        opt.optimise_partition(partition)
        communities = partition.membership
        print('n. communities: {:}'.format(len(np.unique(communities))))
        ds.samplesheet['community'] = communities

    print('Unsupervised clustering, rough')
    import igraph as ig
    sys.path.insert(0, os.path.abspath('../../packages/'))
    import leidenalg
    G = ig.Graph(edges=edges)
    partition = partition = leidenalg.CPMVertexPartition(
        G,
        resolution_parameter=0.002,
    )
    opt = leidenalg.Optimiser()
    def fit_transform(self, graphs):
        G=[]
        for graph in graphs:
            if type(graph) is ig.Graph:
                G.append(graph)
            elif issparse(graph):
                G.append(self._scipy_to_igraph(graph))
            else:
                G.append(self._other_to_igraph(graph))
                
        if self.verbose:
            for i in range(len(G)):
                print("View Graph {}: num_nodes: {}, num_edges: {}, directed: {}, num_components: {}, num_isolates: {}"
                      .format(i, G[i].vcount(), G[i].ecount(), G[i].is_directed(), 
                              len(G[i].components(mode='WEAK').sizes()), G[i].components(mode='WEAK').sizes().count(1)))
        
        self.weights = []
        self.resolutions =[]
        self.best_modularity =-np.inf
        self.best_clustering = None
        self.best_resolutions = None
        self.best_weights = None
        self.modularities =[]
        self.clusterings =[]
        self.final_iteration = 0
        self.best_iteration = 0
        
        weights = [1]*len(G)
        resolutions =[1]*len(G)
        
        for iterate in range(self.max_clusterings):
            partitions = []
            for i in range(len(G)):
                partitions.append(la.RBConfigurationVertexPartition(G[i], resolution_parameter=resolutions[i]))
                
            optimiser = la.Optimiser()
            diff = optimiser.optimise_partition_multiplex(partitions, layer_weights = weights, n_iterations=self.n_iterations)
            self.clusterings.append(np.array(partitions[0].membership))
            self.modularities.append([part.quality()/(part.graph.ecount() if part.graph.is_directed() else 2*part.graph.ecount()) 
                                      for part in partitions])
            self.weights.append(weights.copy())
            self.resolutions.append(resolutions.copy())
            self.final_iteration +=1
            
            
            if self.verbose:
                print("--------")
                print("Iteration: {} \n Modularities: {} \n Resolutions: {} \n Weights: {}"
                      .format(self.final_iteration, self.modularities[-1], resolutions, weights))
            
            # if np.sum(np.array(self.weights[-1]) * np.array(self.modularities[-1])) > self.best_modularity:
            self.best_clustering = self.clusterings[-1]
            self.best_modularity = np.sum(np.array(self.weights[-1]) * np.array(self.modularities[-1]))
            self.best_resolutions = self.resolutions[-1]
            self.best_weights = self.weights[-1]
            self.best_iteration = self.final_iteration
                
            theta_in, theta_out = self._calculate_edge_probabilities(G)
            for i in range(len(G)):
                resolutions[i] = (theta_in[i] - theta_out[i])/ (np.log(theta_in[i]) - np.log(theta_out[i]))
                weights[i] = (np.log(theta_in[i]) - np.log(theta_out[i]))/(np.mean([np.log(theta_in[j]) - np.log(theta_out[j]) for j in range(len(G))]))

                
            if (np.all(np.abs(np.array(self.resolutions[-1])-np.array(resolutions)) <= self.resolution_tol)
                and np.all(np.abs(np.array(self.weights[-1])-np.array(weights)) <= self.resolution_tol)):
                break
        else:
            best_iteration = np.argmax([np.sum(np.array(self.weights[i]) * np.array(self.modularities[i]))
                                        for i in range(len(self.modularities))])
            self.best_clustering = self.clusterings[best_iteration]
            self.best_modularity = np.sum(np.array(self.weights[best_iteration]) * np.array(self.modularities[best_iteration]))
            self.best_resolutions = self.resolutions[best_iteration]
            self.best_weights = self.weights[best_iteration]
            self.best_iteration = best_iteration
            
            if self.verbose:
                print("MVMC did not converge, best result found: Iteration: {}, Modularity: {}, Resolutions: {}, Weights: {}"
                      .format(self.best_iteration, self.best_modularity, self.best_resolutions, self.best_weights))


        return self.best_clustering