Ejemplo n.º 1
0
def python_leiden(df,
                  partition_type,
                  initial_membership=None,
                  weights=None,
                  n_iterations=2,
                  seed=None,
                  resolution_parameter=1):

    # create networkx object
    Gx = nx.from_pandas_edgelist(df=df,
                                 source='from',
                                 target='to',
                                 edge_attr='weight')

    # get weight attribute
    myweights = nx.get_edge_attributes(Gx, 'weight')

    # convert to igraph
    G = ig.Graph.TupleList(Gx.edges(), directed=False)
    G.es['weight'] = list(myweights.values())

    if partition_type == 'RBConfigurationVertexPartition':
        partition = la.find_partition(
            G,
            partition_type=la.RBConfigurationVertexPartition,
            initial_membership=initial_membership,
            weights=weights,
            n_iterations=n_iterations,
            seed=seed,
            resolution_parameter=resolution_parameter)
    elif partition_type == 'ModularityVertexPartition':
        partition = la.find_partition(
            G,
            partition_type=la.ModularityVertexPartition,
            initial_membership=initial_membership,
            weights=weights,
            n_iterations=n_iterations,
            seed=seed)
    else:
        print('no other configurations have been tested')

    # create dataframe with results

    vname = partition.graph.vs['name']
    membership = partition.membership
    membership_plus1 = [x + 1 for x in membership]
    datadict = {'V': vname, 'mem': membership_plus1}
    leiden_dfr = pd.DataFrame(datadict)
    leiden_dfr = leiden_dfr.set_index('V')

    leiden_dfr = pd.DataFrame(datadict)

    return (leiden_dfr)
Ejemplo n.º 2
0
    def leiden(self):
        #########Use Leiden Algorithm to do clustering########
        map_del = self.seq_map.tocoo()
        vcount = map_del.shape[0]
        sources = map_del.row
        targets = map_del.col
        wei = map_del.data
        index = sources>targets
        sources = sources[index]
        targets = targets[index]
        wei = wei[index]
        edgelist = list(zip(sources, targets))
        g = ig.Graph(vcount, edgelist)
        
        #############determine the best resolution parameter###########
        st = []
        res_option = np.arange(0,300,10)
        res_option[0] = 1
        for res in res_option:
            part = leidenalg.find_partition(g , leidenalg.RBConfigurationVertexPartition , weights=wei , resolution_parameter = res , n_iterations = -1)
            part = list(part)
            label_true = []
            label_pred = []
            for i in range(len(part)):
                for j in self.tax[part[i]]:
                    if j != 'Unassign':
                        label_true.append(j)
                        label_pred.append(i)
            ARI_score = metrics.adjusted_rand_score(label_true, label_pred)
            NMI_score = metrics.normalized_mutual_info_score(label_true, label_pred)
            st.append((ARI_score+NMI_score)/2) 

        ind = st.index(max(st))
        res_optimal = res_option[ind]
        part = leidenalg.find_partition(g , leidenalg.RBConfigurationVertexPartition , weights=wei , resolution_parameter = res_optimal, n_iterations = -1)
        part = list(part)

        # dict of communities
        numnode = 0
        rang = []
        for ci in range(len(part)):
            if np.sum(self.len[part[ci]]) >= self.binsize:
                rang.append(ci)
                numnode = numnode+len(part[ci])
                for id in part[ci]:
                    self.dist_cluster[self.name[id]] = 'group'+str(ci)

        logger.debug('The optimal resolution is {}'.format(res_optimal))
        logger.debug('There are {} contigs in {} bins'.format(numnode , len(rang)))
        del map_del, sources, targets, wei, index, edgelist, g, part, label_true, label_pred
Ejemplo n.º 3
0
def leiden_cluster(wadm, resolution=0.1):
    # 将邻接矩阵转变为igraph
    g = get_igraph_from_adjacency(wadm, directed=True)


    # 设置相关参数
    partition_type = leidenalg.RBConfigurationVertexPartition

    partition_kwargs={}
    partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64)
    partition_kwargs['n_iterations'] = -1
    partition_kwargs['seed'] = 0
    partition_kwargs['resolution_parameter'] = resolution

    par = leidenalg.find_partition(g, partition_type, **partition_kwargs)

    # 划分转化为标签
    # 计算节点数
    n = 0
    for i in range(len(par)):
        n += len(par[i])
    # 初始化标签,z
    labels = [-1 for i in range(n)]
    for i in range(len(par)):
        for j in range(len(par[i])):
            labels[par[i][j]] = i
    return labels
Ejemplo n.º 4
0
def leiden_partition(
        vertices: List[int],
        edges: List[Tuple[int, int]]) -> Tuple[Dict[int, int], List[int]]:
    """
    :param vertices: The vertices to be divided into communities by the Leiden algorithm
    :type vertices: List[int]
    :param edges: The links according to which the vertices are divided into communities
    :type edges: List[Tuple[int, int]]
    :return: the partition of the vertices, isolated nodes
    :rtype: Tuple[Dict[int, int], List[int]]
    """
    graph = ig.Graph()

    graph.add_vertices(len(vertices))
    graph.vs['id'] = vertices
    graph.add_edges(edges)

    isolates = [(v.index, v['id']) for v in graph.vs.select(_degree=0)]
    graph.delete_vertices([v[0] for v in isolates])
    isolates = [v[1] for v in isolates]

    if len(graph.vs) == 0:
        return {}, isolates

    partition = leidenalg.find_partition(graph,
                                         leidenalg.ModularityVertexPartition)
    partition_map = {}

    for index, p in enumerate(partition.membership):
        partition_map[graph.vs[index]['id']] = p

    return partition_map, isolates
Ejemplo n.º 5
0
    def leiden(self):
        """
        Cluster the SNN graph using the Leiden algorithm.

        https://github.com/vtraag/leidenalg

        From Louvain to Leiden: guaranteeing well-connected communities
        Traag V, Waltman L, van Eck NJ
        https://arxiv.org/abs/1810.08473
        """
        log_debug('Running leiden clustering...')
        res = self.params['leiden_res']
        seed = self.params['seed']
        # construct the graph object
        nn = set(self.snn_graph[self.snn_graph.columns[0]])
        g = ig.Graph()
        g.add_vertices(len(nn))
        g.vs['name'] = list(range(1, len(nn) + 1))
        ll = []
        for i in self.snn_graph.itertuples(index=False):
            ll.append(tuple(i))
        g.add_edges(ll)
        if self.params == 'ModularityVertexPartition':
            part = leidenalg.ModularityVertexPartition
        else:
            part = leidenalg.RBERVertexPartition
        cl = leidenalg.find_partition(g,
                                      part,
                                      n_iterations=10,
                                      resolution_parameter=res,
                                      seed=seed)
        self.leiden_cl = cl.membership
        self.leiden_prep()
        log_debug('Leiden has finished.')
Ejemplo n.º 6
0
def detect_community(matrix, weight="weight",method="leidenalg"):
    #Find k-clique communities in graph using the percolation method.
    g = nx.from_numpy_matrix(matrix)
    dict_communities = {}
    #if method=="percolation":
        #communities = list(k_clique_communities(g, 3))
    #elif method=="Clauset-Newman-Moore":
        #communities = list(greedy_modularity_communities(g))
    #elif method == "infomap":
        #communities = findCommunities(g)
    #elif method == "label_propagation":
        #communities = label_propagation_communities(g)
    if method == "leidenalg": # run use python2
        import igraph as ig  # run use python2
        import leidenalg
        conn_indices = np.where(matrix)
        # get the weights corresponding to these indices
        weights = matrix[conn_indices]
        # a sequence of (i, j) tuples, each corresponding to an edge from i -> j
        edges = zip(*conn_indices)
        g = ig.Graph(edges)
        communities = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
        #print(communities.membership)
        #print(len(communities.membership))
        #print("network adj shape",matrix.shape)
        for i in range(0,len(communities.membership)):
            dict_communities[i] = communities.membership[i]
        return dict_communities
Ejemplo n.º 7
0
    def make_leiden_clusters(self,
                             resolution: float = 1.0,
                             random_seed=4466) -> None:
        """
        Leiden clustering

        :param n_clusters: Number of clusters
        :return: None
        """

        try:
            import leidenalg
        except ImportError:
            raise ImportError(
                "ERROR: 'leidenalg' package is not installed. Please find the installation instructions "
                "here: https://github.com/vtraag/leidenalg#installation.")
        import igraph  # python-igraph

        adj = nx.to_scipy_sparse_matrix(self.refG)
        sources, targets = adj.nonzero()
        g = igraph.Graph()
        g.add_vertices(adj.shape[0])
        g.add_edges(list(zip(sources, targets)))
        g.es['weight'] = adj[sources, targets].A1
        part = leidenalg.find_partition(
            g,
            leidenalg.RBConfigurationVertexPartition,
            resolution_parameter=resolution,
            seed=random_seed)
        clusts = np.array(part.membership) + 1
        for n, c in zip(self.refG.nodes, clusts):
            self.nodes[n]['cluster'] = str(c)
        return None
Ejemplo n.º 8
0
def leidenalg_igraph(g, res, random_state=0):
    '''Leidenalg clustering on an igraph object.

    Parameters
    ----------
    g : igraph
        The igraph object of connectivities.
    res : float
        The resolution parameter for Leidenalg clustering.
    random_state : int, optional
        The random state.      

    Returns
    ----------
    labels : np.array     
        \([N, ]\) The clustered labels.
    '''
    partition_kwargs = {}
    partition_type = leidenalg.RBConfigurationVertexPartition
    partition_kwargs["resolution_parameter"] = res
    partition_kwargs["seed"] = random_state
    part = leidenalg.find_partition(
                    g, partition_type,
                    **partition_kwargs,
                )
    labels = np.array(part.membership)
    return labels
Ejemplo n.º 9
0
def ld_time(G):

    # initiate an igraph object
    g = ig.Graph()

    # add vertices
    g.add_vertices(G.nodes)

    # add edges
    g.add_edges(G.edges)

    # add weights
    if nx.is_weighted(G):
        g.es['weight'] = list(nx.get_edge_attributes(G, 'weight').values())

    # initiate a list to store execution time for each algo
    algo_time = []

    for i in tqdm(range(10)):

        # start
        start_time = time.time()

        # fit the model
        partition = leidenalg.find_partition(
            g, leidenalg.ModularityVertexPartition)

        algo_time.append(time.time() - start_time)

    return np.mean(algo_time)
Ejemplo n.º 10
0
def run_leiden(g, **params):
    """
    runs the leiden partitioning algorithm on a given graph.
    """
    import leidenalg
    part = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
    return part.membership
Ejemplo n.º 11
0
def leiden(conn, resolution=0.05, random_state=0, n_iterations=-1):
    try:
        import leidenalg as la
    except ImportError:
        raise ImportError(
            'Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`.'
        )

    start = print('running Leiden clustering')

    partition_kwargs = {}
    # # convert adjacency matrix into igraph
    g = get_igraph_from_adjacency(conn)

    # Parameter setting
    partition_type = la.RBConfigurationVertexPartition
    partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64)
    partition_kwargs['n_iterations'] = n_iterations
    partition_kwargs['seed'] = random_state
    partition_kwargs['resolution_parameter'] = resolution

    # Leiden algorithm
    # part = la.find_partition(g, la.CPMVertexPartition, **partition_kwargs)
    part = la.find_partition(g, partition_type, **partition_kwargs)
    # groups store the length |V| array, the integer in each element(node) denote the cluster it belong
    groups = np.array(part.membership)

    n_clusters = int(np.max(groups) + 1)

    print('finished')
    return groups, n_clusters
Ejemplo n.º 12
0
def run_alg(G, alg, gamma=1.0, sample=1.0):
    '''
    Run community detection algorithm with a resolution parameter. Right now only use RB in Louvain/Leiden

    Parameters
    ----------
    G : igraph.Graph
    alg : str
        choose between 'louvain' and 'leiden'
    gamma : float
        resolution parameter
    sample : if smaller than 1, randomly delete a fraction of edges each time
    Returns
    ------
    C: scipy.sparse.csr_matrix
        a matrix recording the membership of each cluster

    '''
    G1 = G.copy()
    if sample < 1:
        G1 = network_perturb(G, sample)
    if alg =='louvain':
        partition_type = louvain.RBConfigurationVertexPartition
        partition = louvain.find_partition(G1, partition_type, resolution_parameter=gamma)
    elif alg == 'leiden':
        partition_type = leidenalg.RBConfigurationVertexPartition
        partition = leidenalg.find_partition(G1, partition_type, resolution_parameter=gamma)
    # partition = sorted(partition, key=len, reverse=True)
    LOGGER.info('Resolution: {:.4f}; find {} clusters'.format(gamma, len(partition)))

    return partition_to_membership_matrix(partition)
Ejemplo n.º 13
0
def surprise_communities(g,
                         initial_membership=None,
                         weights=None,
                         node_sizes=None):
    """

    Surprise_communities is a model where the quality function to optimize is:

    .. math:: Q = m D(q \\parallel \\langle q \\rangle)

    where :math:`m` is the number of edges,  :math:`q = \\frac{\\sum_c m_c}{m}`,  is the fraction of internal edges, :math:`\\langle q \\rangle = \\frac{\\sum_c \\binom{n_c}{2}}{\\binom{n}{2}}` is the expected fraction of internal edges, and finally

    :math:`D(x \\parallel y) = x \\ln \\frac{x}{y} + (1 - x) \\ln \\frac{1 - x}{1 - y}`  is the binary Kullback-Leibler divergence.

    For directed graphs we can multiplying the binomials by 2, and this leaves :math:`\\langle q \\rangle` unchanged, so that we can simply use the same
    formulation.  For weighted graphs we can simply count the total internal weight instead of the total number of edges for :math:`q` , while :math:`\\langle q \\rangle` remains unchanged.

    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.surprise_communities(G)

    :References:

    Traag, V. A., Aldecoa, R., & Delvenne, J.-C. (2015).  `Detecting communities using asymptotical surprise. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.92.022816/>`_ Physical Review E, 92(2), 022816. 10.1103/PhysRevE.92.022816

    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(g,
                                    leidenalg.SurpriseVertexPartition,
                                    initial_membership=initial_membership,
                                    weights=weights,
                                    node_sizes=node_sizes)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "Surprise",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "weights": weights,
                              "node_sizes": node_sizes
                          })
Ejemplo n.º 14
0
def main(timestamp):
    nng = ig.read(f'/tmp/{timestamp}_leiden_graph.gml',
                  format='graphml')
    partition = la.find_partition(nng, la.ModularityVertexPartition)
    clusters = partition.membership
    clusters = np.array(clusters).astype(str)
    np.savetxt(f'/tmp/{timestamp}_leiden_clusters.csv',
               clusters, delimiter=',', newline='\n', fmt='%s')
Ejemplo n.º 15
0
    def __call__(self, orig_affinity_mat, initclusters):

        #assert there are no nan values in data
        #assert that the min affinity is >= 0
        if scipy.sparse.issparse(orig_affinity_mat):
            assert np.sum(np.isnan(orig_affinity_mat.data))==0
            #assert that the min affinity is >= 0
            assert np.min(orig_affinity_mat.data) >= 0,\
                    np.min(orig_affinity_mat.data)
        else:
            assert np.sum(np.isnan(orig_affinity_mat))==0
            assert np.min(orig_affinity_mat) >= 0,\
                    np.min(orig_affinity_mat)

        if (self.verbose):
            print("Beginning preprocessing + Leiden")
            print_memory_use()
            sys.stdout.flush()
        all_start = time.time()
        if (self.affmat_transformer is not None):
            affinity_mat = self.affmat_transformer(orig_affinity_mat)
        else:
            affinity_mat = orig_affinity_mat

        the_graph = get_igraph_from_adjacency(adjacency=affinity_mat)
        best_clustering = None
        best_quality = None

        if (self.verbose):
            toiterover = tqdm(range(self.numseedstotry))
        else:
            toiterover = range(self.numseedstotry)

        #if an initclustering is specified, we would want to try the Leiden
        # both with and without that initialization and take the one that
        # gets the best modularity
        initclusters_to_try_list = [None]
        if (initclusters is not None):
            initclusters_to_try_list.append(initclusters)

        for seed in toiterover:
            for initclusters_to_try in initclusters_to_try_list:
                partition = leidenalg.find_partition(
                    the_graph, self.partitiontype,
                    weights=(np.array(the_graph.es['weight'])
                             .astype(np.float64)),
                    n_iterations=self.n_leiden_iterations,
                    initial_membership=initclusters_to_try,
                    seed=seed*100)
                quality = partition.quality()
                if ((best_quality is None) or (quality > best_quality)):
                    best_quality = quality
                    best_clustering = np.array(partition.membership)
                    if (self.verbose):
                        print("Quality:",best_quality)
                        sys.stdout.flush()
        return ClusterResults(cluster_indices=best_clustering,
                              quality=best_quality)
Ejemplo n.º 16
0
def run_leiden(
    graph: sp.coo_matrix,
    directed: bool,
    partition_type: Optional[Type[MutableVertexPartition]],
    resolution_parameter: float,
    n_iterations: int,
    seed: Optional[int],
    use_weights: bool,
    kargs,
) -> Tuple[np.ndarray, float]:
    """
    Wrapper for leiden community detection

    Args:
        graph (sp.coo_matrix): Affinity matrix
        directed (bool): See below in 'cluster()'
        partition_type (Optional[Type[MutableVertexPartition]]): See below in 'cluster()'
        resolution_parameter (float): See below in 'cluster()'
        n_iterations (int): See below in 'cluster()'
        seed (Optional[int]): See below in 'cluster()'
        use_weights (bool): See below in 'cluster()'
        kargs: See below in 'cluster()'

    Returns:
        communities, Q (Tuple[np.ndarray, float]): See below in 'cluster()'
    """

    # convert resulting graph from scipy.sparse.coo.coo_matrix to Graph object
    # get indices of vertices
    edgelist = np.vstack(graph.nonzero()).T.tolist()
    g = ig.Graph(max(graph.shape), edgelist, directed=directed)
    # set vertices as weights
    g.es["weights"] = graph.data

    if not partition_type:
        partition_type = leidenalg.RBConfigurationVertexPartition
    if resolution_parameter:
        kargs["resolution_parameter"] = resolution_parameter
    if use_weights:
        kargs["weights"] = np.array(g.es["weights"]).astype("float64")
    kargs["n_iterations"] = n_iterations
    kargs["seed"] = seed

    print("Running Leiden optimization", flush=True)
    tic_ = time.time()
    communities = leidenalg.find_partition(
        g,
        partition_type=partition_type,
        **kargs,
    )
    Q = communities.q
    print(
        "Leiden completed in {} seconds".format(time.time() - tic_),
        flush=True,
    )
    communities = np.asarray(communities.membership)

    return communities, Q
Ejemplo n.º 17
0
def rb_pots(g, initial_membership=None, weights=None, resolution_parameter=1):
    """
    Rb_pots is a model where the quality function to optimize is:

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\frac{k_i k_j}{2m} \\right)\\delta(\\sigma_i, \\sigma_j)

    where :math:`A` is the adjacency matrix, :math:`k_i` is the (weighted) degree of node :math:`i`, :math:`m` is the total number of edges (or total edge weight), :math:`\\sigma_i` denotes the community of node :math:`i` and :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if :math:`\\sigma_i = \\sigma_j` and `0` otherwise.
    For directed graphs a slightly different formulation is used, as proposed by Leicht and Newman :

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\frac{k_i^\mathrm{out} k_j^\mathrm{in}}{m} \\right)\\delta(\\sigma_i, \\sigma_j),

    where :math:`k_i^\\mathrm{out}` and :math:`k_i^\\mathrm{in}` refers to respectively the outdegree and indegree of node :math:`i` , and :math:`A_{ij}` refers to an edge from :math:`i` to :math:`j`.
    Note that this is the same of Leiden algorithm when setting :math:`\\gamma=1` and normalising by :math:`2m`, or :math:`m` for directed graphs.


    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Default 1
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.rb_pots(G)

    :References:

    Reichardt, J., & Bornholdt, S. (2006).  `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_  Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110

    Leicht, E. A., & Newman, M. E. J. (2008).  `Community Structure in Directed Networks. <https://www.ncbi.nlm.nih.gov/pubmed/18517839/>`_  Physical Review Letters, 100(11), 118703. 10.1103/PhysRevLett.100.118703

    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(g,
                                    leidenalg.RBConfigurationVertexPartition,
                                    resolution_parameter=resolution_parameter,
                                    initial_membership=initial_membership,
                                    weights=weights)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "RB Pots",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "weights": weights,
                              "resolution_parameter": resolution_parameter
                          })
Ejemplo n.º 18
0
def get_leiden_modularity(G):
    '''
    Accept a networkx graph, return the modularity of the best
    partition according to the Leiden algorithm.
    '''
    Gi = nx_to_ig(G)
    leid = leidenalg.find_partition(Gi, leidenalg.ModularityVertexPartition)
    m1 = leid.modularity
    return m1
Ejemplo n.º 19
0
def leiden_with_silhouette_score(X,
                                 leiden_nneighbors,
                                 skip_silhouette=False,
                                 leiden_iterations=10):
    """

    Parameters
    ----------
    X :
        
    leiden_nneighbors :
        
    skip_silhouette :
         (Default value = False)
    leiden_iterations :
         (Default value = 10)

    Returns
    -------

    """
    from sklearn.neighbors import kneighbors_graph
    from panopticon.utilities import get_igraph_from_adjacency
    from panopticon.utilities import import_check
    from sklearn.metrics import silhouette_score
    from collections import namedtuple

    exit_code = import_check("leidenalg",
                             'conda install -c conda-forge leidenalg')
    if exit_code != 0:
        return
    import leidenalg
    A = kneighbors_graph(X,
                         leiden_nneighbors,
                         mode='connectivity',
                         include_self=True,
                         metric='cosine')
    ig = get_igraph_from_adjacency(A)
    part = leidenalg.find_partition(ig,
                                    leidenalg.RBConfigurationVertexPartition,
                                    n_iterations=leiden_iterations,
                                    seed=17)
    clustering = part.membership
    if skip_silhouette:
        score = None
    else:
        score = silhouette_score(
            X,
            clustering,
            metric='cosine',
        )

    leiden_silhouette_output = namedtuple("LeidenSilhouetteOutput",
                                          "score nneighbors clustering")

    return leiden_silhouette_output(score, leiden_nneighbors, clustering)
Ejemplo n.º 20
0
def rber_pots(g,
              initial_membership=None,
              weights=None,
              node_sizes=None,
              resolution_parameter=1):
    """
    rber_pots is a Leiden model where the quality function to optimize is:

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma p \\right)\\delta(\\sigma_i, \\sigma_j)

    where :math:`A` is the adjacency matrix,  :math:`p = \\frac{m}{\\binom{n}{2}}` is the overall density of the graph, :math:`\\sigma_i` denotes the community of node :math:`i`, :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if  :math:`\\sigma_i = \\sigma_j` and `0` otherwise, and, finally :math:`\\gamma` is a resolution parameter.


    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Deafault 1
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.rber_pots(G)

    :References:

    Reichardt, J., & Bornholdt, S. (2006).  `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_  Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110


    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(
        g,
        leidenalg.RBERVertexPartition,
        resolution_parameter=resolution_parameter,
        initial_membership=initial_membership,
        weights=weights,
        node_sizes=node_sizes,
    )
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "RBER Pots",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "weights": weights,
                              "node_sizes": node_sizes,
                              "resolution_parameter": resolution_parameter
                          })
Ejemplo n.º 21
0
def get_leiden(mknn, min_cluster_size=10, resolution_parameter=1.0, seed=0, n_iterations=5):

    g = ig.Graph(n=mknn.shape[0], edges=list(zip(mknn.row, mknn.col)), directed=False)

    part = leidenalg.find_partition(g, leidenalg.RBConfigurationVertexPartition,
                seed = seed, n_iterations = n_iterations,
                resolution_parameter=resolution_parameter,
                )

    return CellLabels(clean_labels(part.membership, min_cluster_size=min_cluster_size))   
Ejemplo n.º 22
0
def leiden(gr, resolution=1, optimizer="RB", seed=12343):
    weights = gr.es["weight"]
    if (optimizer == "RB"):
        algo = la.RBConfigurationVertexPartition
    elif (optimizer == "CPM"):
        algo = la.CPMVertexPartition
    partition = la.find_partition(gr, algo,
        n_iterations=10, seed=seed, resolution_parameter=resolution,
        weights=weights)
    return partition
def communities_leiden(graph):
    graphi = nx_to_ig(graph)
    partition = la.find_partition(graphi,
                                  la.ModularityVertexPartition) 
    dictionary = get_id_to_title(graph)
    res = translate_leiden_to_dict(partition, graphi, dictionary)
    
    print('detcted',len(res),'communities')

    return res
Ejemplo n.º 24
0
def leiden_clustering(adjacency, res=1.0, directed=False, part=None):
    import leidenalg
    g, weights = generate_igraph(adjacency, directed=directed)
    if part is None:
        part = leidenalg.find_partition(
            g,
            leidenalg.RBConfigurationVertexPartition,
            resolution_parameter=res)
        part = part.membership
    modularity = g.modularity(part, weights=weights)
    return part, modularity
Ejemplo n.º 25
0
def cpm(g_original, initial_membership=None, weights=None, node_sizes=None, resolution_parameter=1):
    """
    CPM is a model where the quality function to optimize is:

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\right)\\delta(\\sigma_i, \\sigma_j)

    where :math:`A` is the adjacency matrix, :math:`\\sigma_i` denotes the community of node :math:`i`, :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if  :math:`\\sigma_i = \\sigma_j` and `0` otherwise, and, finally :math:`\\gamma` is a resolution parameter.

    The internal density of communities

    .. math:: p_c = \\frac{m_c}{\\binom{n_c}{2}} \\geq \\gamma

    is higher than :math:`\\gamma`, while the external density

    :math:`p_{cd} = \\frac{m_{cd}}{n_c n_d} \\leq \\gamma`    is lower than :math:`\\gamma`. In other words, choosing a particular
    :math:`\\gamma` corresponds to choosing to find communities of a particular
    density, and as such defines communities. Finally, the definition of a community is in a sense independent of the actual graph, which is not the case for any of the other methods.


    :param g_original: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Deafault 1
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.cpm(G)

    :References:

    Traag, V. A., Van Dooren, P., & Nesterov, Y. (2011).  `Narrow scope for resolution-limit-free community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.84.016114/>`_ Physical Review E, 84(1), 016114. 10.1103/PhysRevE.84.016114


    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    if ig is None:
        raise ModuleNotFoundError("Optional dependency not satisfied: install igraph to use the selected feature.")

    g = convert_graph_formats(g_original, ig.Graph)

    part = leidenalg.find_partition(g, leidenalg.CPMVertexPartition,
                                    resolution_parameter=resolution_parameter, initial_membership=initial_membership,
                                    weights=weights, node_sizes=node_sizes, )
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms, g_original, "CPM", method_parameters={"initial_membership": initial_membership,
                                                             "weights": weights, "node_sizes": node_sizes,
                                                             "resolution_parameter": resolution_parameter})
Ejemplo n.º 26
0
def communityFromGraph(ig):
    """
    Loads graph from edge list pKL file and calculates communities
    """
    #dfile = make_graph_from_dict(gfile)
    #ig = pickle.load(open(dfile, 'rb'))#getIgraph(dfile)
    partition = la.find_partition(ig, la.ModularityVertexPartition)
    comm_dict = {}
    for p in range(len(partition)):
        comm_dict[p] = ig.vs.select(partition[p])['name']
    print("Found", len(comm_dict), 'communities for the primary interactome')
    return comm_dict
Ejemplo n.º 27
0
def louvain_clusters(latent, k=10, rands=0):
    nn_matrix = kneighbors_graph(latent, k)
    rows, cols = np.where(nn_matrix.todense() == 1)
    edges = [(row, col) for row, col in zip(rows, cols)]
    g = ig.Graph()
    g.add_vertices(latent.shape[0])
    g.add_edges(edges)
    res = leidenalg.find_partition(g,
                                   leidenalg.ModularityVertexPartition,
                                   seed=rands)
    clusters = np.asarray(res.membership)
    return clusters
def community_detection(input, output):
    print("Loading graph...")
    graph = Graph.Read_Pickle(input)

    print("Partitioning...")
    part = leidenalg.find_partition(graph, leidenalg.ModularityVertexPartition)

    print("Saving partition...")
    with open(output, "w") as file:
        for partition in part:
            file.write(repr(partition))
            file.write("\n")
def communities_lei(mob):
    G = od_igraph(m)
    partition = leidenalg.find_partition(G,
                                         leidenalg.ModularityVertexPartition,
                                         n_iterations=2,
                                         weights='weight')
    cluster = []
    for i, part in enumerate(partition):
        df = pd.DataFrame({'quadkey': G.vs()[part]['name']})
        df['cluster'] = i
        cluster.append(df)
    return (pd.concat(cluster))
Ejemplo n.º 30
0
def identify_communities_leidenalg(net):
	giant = get_largest_component(net)
	comms = leidenalg.find_partition(giant, leidenalg.ModularityVertexPartition)
	comm_list = comms.subgraphs() # communities in current level
	print('Number of communities identified:',len(comm_list))
	net_copy = net.copy()
	net_copy.vs['community'] = "-1"
	for idx,comm in enumerate(comm_list):
		for v1 in comm.vs:
			v2 = net_copy.vs.find(name=v1['name'])
			v2['community'] = str(idx+1)
	return net_copy