Exemple #1
0
    def test_convert(self):
        g = nx.karate_club_graph()
        ign = utils.convert_graph_formats(g, ig.Graph)
        self.assertEqual(g.number_of_nodes(), ign.vcount())
        self.assertEqual(g.number_of_edges(), ign.ecount())

        g2 = utils.convert_graph_formats(ign, nx.Graph)
        self.assertEqual(g.number_of_nodes(), g2.number_of_nodes())
        self.assertEqual(g.number_of_edges(), g2.number_of_edges())

        g3 = utils.convert_graph_formats(g, nx.Graph)
        self.assertEqual(isinstance(g, nx.Graph), isinstance(g3, nx.Graph))

        g3 = utils.convert_graph_formats(ign, nx.Graph, directed=True)
        self.assertIsInstance(g3, nx.DiGraph)
Exemple #2
0
def hierarchical_link_community(g):
    """
    HLC (hierarchical link clustering) is a method to classify links into topologically related groups.
    The algorithm uses a similarity between links to build a dendrogram where each leaf is a link from the original network and branches represent link communities.
    At each level of the link dendrogram is calculated the partition density function, based on link density inside communities, to pick the best level to cut.

    :param g: a networkx/igraph object
    :return: EdgeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.hierarchical_link_community(G)

    :References:

    Ahn, Yong-Yeol, James P. Bagrow, and Sune Lehmann. `Link communities reveal multiscale complexity in networks. <https://www.nature.com/articles/nature09182/>`_ nature 466.7307 (2010): 761.
    """

    g = convert_graph_formats(g, nx.Graph)

    adj, edges = HLC_read_edge_list_unweighted(g)

    edge2cid, _, _, _ = HLC(adj, edges).single_linkage()

    coms = defaultdict(list)
    for e, com in edge2cid.items():
        coms[com].append(e)

    coms = [list(c) for c in coms.values()]
    return EdgeClustering(coms, g, "HLC", method_parameters={})
Exemple #3
0
def girvan_newman(g_original, level):
    """
    The Girvan–Newman algorithm detects communities by progressively removing edges from the original graph.
    The algorithm removes the "most valuable" edge, traditionally the edge with the highest betweenness centrality, at each step. As the graph breaks down into pieces, the tightly knit community structure is exposed and the result can be depicted as a dendrogram.

    :param g_original: a networkx/igraph object
    :param level: the level where to cut the dendrogram
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.girvan_newman(G, level=3)

    :References:

    Girvan, Michelle, and Mark EJ Newman. `Community structure in social and biological networks. <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC122977/>`_ Proceedings of the national academy of sciences 99.12 (2002): 7821-7826.
    """

    g = convert_graph_formats(g_original, nx.Graph)

    gn_hierarchy = nx.algorithms.community.girvan_newman(g)
    coms = []
    for _ in range(level):
        coms = next(gn_hierarchy)

    communities = []

    for c in coms:
        communities.append(list(c))

    return NodeClustering(communities, g_original, "Girvan Newman", method_parameters={"level": level})
Exemple #4
0
def lpam(g_original, k=2, threshold=0.5, distance="amp", seed=0):
    """
    Link Partitioning Around Medoids

    :param g_original: a networkx/igraph object
    :param k: number of clusters
    :param threshold: merging threshold in [0,1], default 0.5
    :param distance: type of distance: "amp" - amplified commute distance, or "cm" - commute distance, or distance matrix between all edges as np ndarray
    :param seed: random seed for k-medoid heuristic
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.lpam(G, k=2, threshold=0.4, distance = "amp")

    :References:

    Alexander Ponomarenko, Leonidas Pitsoulis, Marat Shamshetdinov. "Link Partitioning Around Medoids". https://arxiv.org/abs/1907.08731

    """
    g = convert_graph_formats(g_original, nx.Graph)
    return LPAM(graph=g, k=k, threshold=threshold, distance=distance, seed=seed)
Exemple #5
0
def lpanni(g_original, threshold=0.1):
    """

    LPANNI (Label Propagation Algorithm with Neighbor Node Influence) detects overlapping community structures by adopting fixed label propagation sequence based on the ascending order of node importance and label update strategy based on neighbor node influence and historical label preferred strategy.

    :param g_original: a networkx/igraph object
    :param threshold: Default 0.0001

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.lpanni(G)

    :References:

    Lu, Meilian, et al. "LPANNI: Overlapping community detection using label propagation in large-scale complex networks." IEEE Transactions on Knowledge and Data Engineering 31.9 (2018): 1736-1749.

    .. note:: Reference implementation: https://github.com/wxwmd/LPANNI
    """
    g = convert_graph_formats(g_original, nx.Graph)
    LPANNI(g)
    gen = GraphGenerator(threshold, g)
    communities = [list(c) for c in gen.get_Overlapping_communities()]

    return NodeClustering(communities, g_original, "LPANNI", method_parameters={"threshold": threshold},
                          overlap=True)
Exemple #6
0
def core_expansion(g_original, tolerance=0.0001):
    """
     Core Expansion automatically detect the core of each possible community in the network.
     Then, it iteratively expand each core by adding the nodes to form the fnal communities.
     The expansion process is based on the neighborhood overlap measure.

    :param g_original: a networkx/igraph object
    :param tolerance: numerical tollerance, default 0.0001
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.core_expansion(G)

    :References:

    Choumane, Ali, Ali Awada, and Ali Harkous. "Core expansion: a new community detection algorithm based on neighborhood overlap." Social Network Analysis and Mining 10 (2020): 1-11.

    .. note:: Reference implementation: https://github.com/pkalkunte18/CoreExpansionAlgorithm
    """
    g = convert_graph_formats(g_original, nx.Graph)
    communities = core_exp_find(g, tolerance)

    return NodeClustering(communities, g_original, "Core Expansion", method_parameters={"tolerance": tolerance},
                          overlap=True)
Exemple #7
0
def aslpaw(g_original):
    """
    ASLPAw can be used for disjoint and overlapping community detection and works on weighted/unweighted and directed/undirected networks.
    ASLPAw is adaptive with virtually no configuration parameters.

    :param g_original: a networkx/igraph object
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.aslpaw(G)

    :References:

    Xie J, Szymanski B K, Liu X. Slpa: Uncovering Overlapping Communities in Social Networks via a Speaker-Listener Interaction Dynamic Process[C]. IEEE 11th International Conference on Data Mining Workshops (ICDMW). Ancouver, BC: IEEE, 2011: 344–349.

    .. note:: Reference implementation: https://github.com/fsssosei/ASLPAw
    """
    g = convert_graph_formats(g_original, nx.Graph)
    coms = ASLPAw(g).adj

    communities = defaultdict(list)
    for i, c in coms.items():
        if len(c) > 0:
            for cid in c:
                communities[cid].append(i)

    return NodeClustering(list(communities.values()), g_original, "ASLPAw", method_parameters={}, overlap=True)
Exemple #8
0
def ego_networks(g_original, level=1):
    """
    Ego-networks returns overlapping communities centered at each nodes within a given radius.

    :param g_original: a networkx/igraph object
    :param level: extrac communities with all neighbors of distance<=level from a node. Deafault 1
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.ego_networks(G)
    """

    g = convert_graph_formats(g_original, nx.Graph)

    coms = []
    for n in g.nodes():
        coms.append(list(nx.ego_graph(g, n, radius=level).nodes()))
    return NodeClustering(coms,
                          g_original,
                          "Ego Network",
                          method_parameters={"level": 1},
                          overlap=True)
Exemple #9
0
def lais2(g):
    """
    LAIS2 is an overlapping community discovery algorithm based on the density function.
    In the algorithm considers the density of a group is defined as the average density of the communication exchanges between the actors of the group.
    LAIS2 IS composed of two procedures LA (Link Aggregate Algorithm) and IS2 (Iterative Scan Algorithm).

    :param g: a networkx/igraph object
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.lais2(G)

    :References:

    Baumes, Jeffrey, Mark Goldberg, and Malik Magdon-Ismail. `Efficient identification of overlapping communities. <https://link.springer.com/chapter/10.1007/11427995_3/>`_ International Conference on Intelligence and Security Informatics. Springer, Berlin, Heidelberg, 2005.

    .. note:: Reference implementation: https://github.com/kritishrivastava/CommunityDetection-Project2GDM

    """

    g = convert_graph_formats(g, nx.Graph)

    coms = LAIS2(g)
    return NodeClustering(coms, g, "LAIS2", method_parameters={"":""}, overlap=True)
Exemple #10
0
def newman_girvan_modularity(graph: nx.Graph, communities: object,
                             **kwargs: dict) -> object:
    """Difference the fraction of intra community edges of a partition with the expected number of such edges if distributed according to a null model.

    In the standard version of modularity, the null model preserves the expected degree sequence of the graph under consideration. In other words, the modularity compares the real network structure with a corresponding one where nodes are connected without any preference about their neighbors.

    .. math:: Q(S) = \\frac{1}{m}\\sum_{c \\in S}(m_S - \\frac{(2 m_S + l_S)^2}{4m})

    where :math:`m` is the number of graph edges, :math:`m_S` is the number of community edges, :math:`l_S` is the number of edges from nodes in S to nodes outside S.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :return: FitnessResult object


    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> mod = evaluation.newman_girvan_modularity(g,communities)

    :References:

    1. Newman, M.E.J. & Girvan, M. `Finding and evaluating community structure in networks. <https://www.ncbi.nlm.nih.gov/pubmed/14995526/>`_ Physical Review E 69, 26113(2004).
    """

    graph = convert_graph_formats(graph, nx.Graph)
    coms = {}
    for cid, com in enumerate(communities.communities):
        for node in com:
            coms[node] = cid

    inc = dict([])
    deg = dict([])
    links = graph.size(weight='weight')
    if links == 0:
        raise ValueError("A graph without link has an undefined modularity")

    for node in graph:
        try:
            com = coms[node]
            deg[com] = deg.get(com, 0.) + graph.degree(node, weight='weight')
            for neighbor, dt in graph[node].items():
                weight = dt.get("weight", 1)
                if coms[neighbor] == com:
                    if neighbor == node:
                        inc[com] = inc.get(com, 0.) + float(weight)
                    else:
                        inc[com] = inc.get(com, 0.) + float(weight) / 2.
        except:
            pass

    res = 0.
    for com in set(coms.values()):
        res += (inc.get(com, 0.) / links) - (deg.get(com, 0.) /
                                             (2. * links))**2

    return FitnessResult(score=res)
Exemple #11
0
def spinglass(g):
    """
    Spinglass relies on an analogy between a very popular statistical mechanic model called Potts spin glass, and the community structure.
    It applies the simulated annealing optimization technique on this model to optimize the modularity.

    :param g: a networkx/igraph object
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.spinglass(G)

    :References:

    Reichardt, Jörg, and Stefan Bornholdt. `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_ Physical Review E 74.1 (2006): 016110.
    """
    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)
    coms = g.community_spinglass()
    communities = []

    for c in coms:
        communities.append([g.vs[x]['name'] for x in c])

    return NodeClustering(communities,
                          g,
                          "Spinglass",
                          method_parameters={"": ""})
Exemple #12
0
def __quality_indexes(graph, communities, scoring_function, summary=True):
    """

    :param graph: NetworkX/igraph graph
    :param communities: NodeClustering object
    :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-communitys ones. Default **True**.
    :return: If **summary==True** a FitnessResult object, otherwise a list of floats.

    """

    graph = convert_graph_formats(graph, nx.Graph)
    values = []
    for com in communities.communities:
        community = nx.subgraph(graph, com)
        if scoring_function in [
                pq.PartitionQuality.average_internal_degree,
                pq.PartitionQuality.internal_edge_density,
                pq.PartitionQuality.triangle_participation_ratio,
                pq.PartitionQuality.edges_inside,
                pq.PartitionQuality.fraction_over_median_degree
        ]:
            values.append(scoring_function(community))
        else:
            values.append(scoring_function(graph, community))

    if summary:
        return FitnessResult(min=min(values),
                             max=max(values),
                             score=np.mean(values),
                             std=np.std(values))
    return values
Exemple #13
0
def scan(g, epsilon, mu):
    """
    SCAN (Structural Clustering Algorithm for Networks) is an algorithm which detects clusters, hubs and outliers in networks.
    It clusters vertices based on a structural similarity measure.
    The method uses the neighborhood of the vertices as clustering criteria instead of only their direct connections.
    Vertices are grouped into the clusters by how they share neighbors.

    :param g: a networkx/igraph object
    :param epsilon: the minimum threshold to assigning cluster membership
    :param mu: minimum number of neineighbors with a structural similarity that exceeds the threshold epsilon
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.scan(G, epsilon=0.7, mu=3)

    :References:

    Xu, X., Yuruk, N., Feng, Z., & Schweiger, T. A. (2007, August). `Scan: a structural clustering algorithm for networks. <http://www1.se.cuhk.edu.hk/~hcheng/seg5010/slides/p824-xu.pdf/>`_ In Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 824-833)
    """

    g = convert_graph_formats(g, nx.Graph)

    algorithm = SCAN_nx(g, epsilon, mu)
    coms = algorithm.execute()
    return NodeClustering(coms,
                          g,
                          "SCAN",
                          method_parameters={
                              "epsilon": epsilon,
                              "mu": mu
                          })
Exemple #14
0
def plot_network_clusters(graph,
                          partition,
                          position,
                          figsize=(8, 8),
                          node_size=200,
                          plot_overlaps=False,
                          plot_labels=False):
    """
    Plot a graph with node color coding for communities.

    :param graph: NetworkX/igraph graph
    :param partition: NodeClustering object
    :param position: A dictionary with nodes as keys and positions as values. Example: networkx.fruchterman_reingold_layout(G)
    :param figsize: the figure size; it is a pair of float, default (8, 8)
    :param node_size: int, default 200
    :param plot_overlaps: bool, default False. Flag to control if multiple algorithms memberships are plotted.
    :param plot_labels: bool, default False. Flag to control if node labels are plotted.

    Example:

    >>> from cdlib import algorithms, viz
    >>> import networkx as nx
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> pos = nx.spring_layout(g)
    >>> viz.plot_network_clusters(g, coms, pos)
    """
    partition = partition.communities
    graph = convert_graph_formats(graph, nx.Graph)

    n_communities = min(len(partition), len(COLOR))
    plt.figure(figsize=figsize)
    plt.axis('off')

    fig = nx.draw_networkx_nodes(graph,
                                 position,
                                 node_size=node_size,
                                 node_color='w')
    fig.set_edgecolor('k')
    nx.draw_networkx_edges(graph, position, alpha=.5)
    for i in range(n_communities):
        if len(partition[i]) > 0:
            if plot_overlaps:
                size = (n_communities - i) * node_size
            else:
                size = node_size
            fig = nx.draw_networkx_nodes(graph,
                                         position,
                                         node_size=size,
                                         nodelist=partition[i],
                                         node_color=COLOR[i])
            fig.set_edgecolor('k')
    if plot_labels:
        nx.draw_networkx_labels(
            graph,
            position,
            labels={node: str(node)
                    for node in graph.nodes()})

    return fig
Exemple #15
0
def slpa(g, t=21, r=0.1):
    """
    SLPA is an overlapping community discovery that extends tha LPA.
    SLPA consists of the following three stages:
    1) the initialization
    2) the evolution
    3) the post-processing


    :param g: a networkx/igraph object
    :param t: maximum number of iterations, default 20
    :param r: threshold  ∈ [0, 1]. It is used in the post-processing stage: if the probability of seeing a particular label during the whole process is less than r, this label is deleted from a node’s memory. Default 0.1
    :return: EdgeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.slpa(G,  t=21, r=0.1)



    :References:

    Xie Jierui, Boleslaw K. Szymanski, and Xiaoming Liu. `Slpa: Uncovering overlapping communities in social networks via a speaker-listener interaction dynamic process. <https://ieeexplore.ieee.org/document/6137400/>`_ Data Mining Workshops (ICDMW), 2011 IEEE 11th International Conference on. IEEE, 2011.

    .. note:: Reference implementation: https://github.com/kbalasu/SLPA
    """

    g = convert_graph_formats(g, nx.Graph)

    coms = slpa_nx(g, T=t, r=r)
    return NodeClustering(coms, g, "SLPA", method_parameters={"T": t, "r": r}, overlap=True)
Exemple #16
0
def lfm(g, alpha):
    """LFM is based on the local optimization of a fitness function.
    It finds both overlapping communities and the hierarchical structure.

    :param g: a networkx/igraph object
    :param alpha: parameter to controll the size of the communities:  Large values of alpha yield very small communities, small values instead deliver large modules. If alpha is small enough, all nodes end up in the same cluster, the network itself. In most cases, for alpha < 0.5 there is only one community, for alpha > 2 one recovers the smallest communities. A natural choise is alpha =1.
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.lfm(G, alpha=0.8)

    :References:

    Lancichinetti, Andrea, Santo Fortunato, and János Kertész. `Detecting the overlapping and hierarchical community structure in complex networks <https://arxiv.org/abs/0802.1218/>`_ New Journal of Physics 11.3 (2009): 033015.
    """

    g = convert_graph_formats(g, nx.Graph)

    algorithm = LFM_nx(g, alpha)
    coms = algorithm.execute()

    return NodeClustering(coms, g, "LFM", method_parameters={"alpha": alpha}, overlap=True)
Exemple #17
0
def kclique(g, k):
    """
    Find k-clique communities in graph using the percolation method.
    A k-clique community is the union of all cliques of size k that can be reached through adjacent (sharing k-1 nodes) k-cliques.

    :param g: a networkx/igraph object
    :param k: Size of smallest clique
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.kclique(G, k=3)

    :References:

    Gergely Palla, Imre Derényi, Illés Farkas1, and Tamás Vicsek, `Uncovering the overlapping community structure of complex networks in nature and society <https://www.nature.com/articles/nature03607/>`_ Nature 435, 814-818, 2005, doi:10.1038/nature03607
    """

    g = convert_graph_formats(g, nx.Graph)

    coms = list(nx.algorithms.community.k_clique_communities(g, k))
    coms = [list(x) for x in coms]
    return NodeClustering(coms, g, "Klique", method_parameters={"k": k}, overlap=True)
Exemple #18
0
def walktrap(g):
    """
    walktrap is an approach based on random walks.
    The general idea is that if you perform random walks on the graph, then the walks are more likely to stay within the same community because there are only a few edges that lead outside a given community. Walktrap runs short random walks and uses the results of these random walks to merge separate communities in a bottom-up manner.

    :param g: a networkx/igraph object
    :return: NodeClusterint object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.walktrap(G)

    :References:

    Pons, Pascal, and Matthieu Latapy. `Computing communities in large networks using random walks. <http://jgaa.info/accepted/2006/PonsLatapy2006.10.2.pdf/>`_ J. Graph Algorithms Appl. 10.2 (2006): 191-218.
    """
    g = convert_graph_formats(g, ig.Graph)
    coms = g.community_walktrap().as_clustering()
    communities = []

    for c in coms:
        communities.append([g.vs[x]['name'] for x in c])

    return NodeClustering(communities, g, "Walktrap")
Exemple #19
0
def percomvc(g_original):
    """
    The PercoMVC approach composes of two steps.
    In the first step, the algorithm attempts to determine all communities that the clique percolation algorithm may find.
    In the second step, the algorithm computes the Eigenvector Centrality method on the output of the first step to measure the influence of network nodes and reduce the rate of the unclassified nodes

    :param g_original: a networkx/igraph object
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.percomvc(G)

    :References:

    Kasoro, Nathanaël, et al. "PercoMCV: A hybrid approach of community detection in social networks." Procedia Computer Science 151 (2019): 45-52.

    .. note:: Reference implementation: https://github.com/sedjokas/PercoMCV-Code-source
    """
    g = convert_graph_formats(g_original, nx.Graph)
    communities = percoMVC(g)

    return NodeClustering(communities,
                          g_original,
                          "PercoMVC",
                          method_parameters={},
                          overlap=True)
Exemple #20
0
def eigenvector(g):
    """
    Newman's leading eigenvector method for detecting community structure based on modularity.
    This is the proper internal of the recursive, divisive algorithm: each split is done by maximizing the modularity regarding the original network.

    :param g: a networkx/igraph object
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.eigenvector(G)

    :References:

    Newman, Mark EJ. `Finding community structure in networks using the eigenvectors of matrices. <https://journals.aps.org/pre/pdf/10.1103/PhysRevE.74.036104/>`_ Physical review E 74.3 (2006): 036104.
    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)
    coms = g.community_leading_eigenvector()

    communities = [g.vs[x]['name'] for x in coms]

    return NodeClustering(communities,
                          g,
                          "Eigenvector",
                          method_parameters={"": ""})
Exemple #21
0
def big_clam(g, number_communities=5):
    """
    BigClam is an overlapping community detection method that scales to large networks.
    The model has three main ingredients:
    1)The node community memberships are represented with a bipartite affiliation network that links nodes of the social network to communities that they belong to.
    2)People tend to be involved in communities to various degrees. Therefore,  each affiliation edge in the bipartite affiliation network has a nonnegative weight. The higher the node’s weight of the affiliation to the community the more likely is the node to be connected to other members in the community.
    3)When people share multiple community affiliations, the links between them stem for one dominant reason. This means that for each community a pair of nodes shares we get an independent chance of connecting the nodes. Thus, naturally, the more communities a pair of nodes shares, the higher the probability of being connected.

    :param g: a networkx/igraph object
    :param number_communities: number communities desired, default 5
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.big_clam(G, 2)

    :References:

    Yang, J., & Leskovec, J. (2013, February). `Overlapping community detection at scale: a nonnegative matrix factorization approach. <https://dl.acm.org/citation.cfm?id=2433471/>`_ In Proceedings of the sixth ACM international conference on Web search and data mining (pp. 587-596). ACM.

    .. note:: Reference implementation: https://github.com/RobRomijnders/bigclam
    """

    g = convert_graph_formats(g, nx.Graph)

    communities = BIGCLAM.big_Clam(g, number_communities)

    return NodeClustering(communities, g, "BigClam", method_parameters={"number_communities": number_communities}, overlap=True)
Exemple #22
0
def link_modularity(graph: nx.Graph, communities: object,
                    **kwargs: dict) -> object:
    """
    Quality function designed for directed graphs with overlapping communities.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :return: FitnessResult object

    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> mod = evaluation.link_modularity(g,communities)

    :References:

    1. Nicosia, V., Mangioni, G., Carchiolo, V., Malgeri, M.: Extending the definition of modularity to directed graphs with overlapping communities. Journal of Statistical Mechanics: Theory and Experiment 2009(03), 03024 (2009)

    """

    graph = convert_graph_formats(graph, nx.Graph)

    return FitnessResult(score=cal_modularity(graph, communities.communities))
Exemple #23
0
def edges_inside(graph: nx.Graph,
                 community: object,
                 summary: bool = True) -> object:
    """Number of edges internal to the community.

    :param graph: a networkx/igraph object
    :param community: NodeClustering object
    :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**.
    :return: If **summary==True** a FitnessResult object, otherwise a list of floats.

    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> mod = evaluation.edges_inside(g,communities)

    :References:

    1. Radicchi, F., Castellano, C., Cecconi, F., Loreto, V., & Parisi, D. (2004). Defining and identifying communities in networks. Proceedings of the National Academy of Sciences, 101(9), 2658-2663.
    """

    graph = convert_graph_formats(graph, nx.Graph)
    values = []
    for com in community.communities:
        coms = nx.subgraph(graph, com)
        values.append(coms.number_of_edges())

    if summary:
        return FitnessResult(min=min(values),
                             max=max(values),
                             score=np.mean(values),
                             std=np.std(values))
    return values
Exemple #24
0
def __quality_indexes(
    graph: nx.Graph,
    communities: object,
    scoring_function: Callable[[object, object], float],
    summary: bool = True,
) -> object:
    """

    :param graph: NetworkX/igraph graph
    :param communities: NodeClustering object
    :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-communitys ones. Default **True**.
    :return: If **summary==True** a FitnessResult object, otherwise a list of floats.

    """

    graph = convert_graph_formats(graph, nx.Graph)
    values = []
    for com in communities.communities:
        community = nx.subgraph(graph, com)
        values.append(scoring_function(graph, community))

    if summary:
        return FitnessResult(min=min(values),
                             max=max(values),
                             score=np.mean(values),
                             std=np.std(values))
    return values
Exemple #25
0
def newman_girvan_modularity(graph, communities, **kwargs):
    """Difference the fraction of intra community edges of a partition with the expected number of such edges if distributed according to a null model.

    In the standard version of modularity, the null model preserves the expected degree sequence of the graph under consideration. In other words, the modularity compares the real network structure with a corresponding one where nodes are connected without any preference about their neighbors.

    .. math:: Q(S) = \\frac{1}{m}\\sum_{c \\in S}(m_S - \\frac{(2 m_S + l_S)^2}{4m})

    where :math:`m` is the number of graph edges, :math:`m_S` is the number of community edges, :math:`l_S` is the number of edges from nodes in S to nodes outside S.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :return: FitnessResult object


    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> mod = evaluation.newman_girvan_modularity(g,communities)

    :References:

    1. Newman, M.E.J. & Girvan, M. `Finding and evaluating community structure in networks. <https://www.ncbi.nlm.nih.gov/pubmed/14995526/>`_ Physical Review E 69, 26113(2004).
    """

    graph = convert_graph_formats(graph, nx.Graph)
    partition = {}
    for cid, com in enumerate(communities.communities):
        for node in com:
            partition[node] = cid

    return FitnessResult(score=pq.PartitionQuality.community_modularity(partition, graph))
Exemple #26
0
def async_fluid(g, k):
    """
    Fluid Communities (FluidC) is based on the simple idea of fluids (i.e., communities) interacting in an environment (i.e., a non-complete graph), expanding and contracting.
    It is propagation-based algorithm and it allows to specify the number of desired communities (k) and it is asynchronous, where each vertex update is computed using the latest partial state of the graph.


    :param g: a networkx/igraph object
    :param k: Number of communities to search
    :return: EdgeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.async_fluid(G,k=2)


    :References:

    Ferran Parés, Dario Garcia-Gasulla, Armand Vilalta, Jonatan Moreno, Eduard Ayguadé, Jesús Labarta, Ulises Cortés, Toyotaro Suzumura T. `Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm. <https://link.springer.com/chapter/10.1007/978-3-319-72150-7_19/>`_
    """

    g = convert_graph_formats(g, nx.Graph)

    coms = nx.algorithms.community.asyn_fluidc(g, k)
    coms = [list(x) for x in coms]
    return NodeClustering(coms, g, "Fluid", method_parameters={"k": k})
Exemple #27
0
def label_propagation(g):
    """
    The Label Propagation algorithm (LPA) detects communities using network structure alone.
    The algorithm doesn’t require a pre-defined objective function or prior information about the communities.
    It works as follows:
    -Every node is initialized with a unique label (an identifier)
    -These labels propagate through the network
    -At every iteration of propagation, each node updates its label to the one that the maximum numbers of its neighbours belongs to. Ties are broken uniformly and randomly.
    -LPA reaches convergence when each node has the majority label of its neighbours.

    :param g: a networkx/igraph object
    :return: EdgeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.label_propagation(G)

    :References:

    Raghavan, U. N., Albert, R., & Kumara, S. (2007). `Near linear time algorithm to detect community structures in large-scale networks. <http://www.leonidzhukov.net/hse/2017/networks/papers/raghavan2007.pdf/>`_ Physical review E, 76(3), 036106.
    """

    g = convert_graph_formats(g, nx.Graph)

    coms = list(nx.algorithms.community.label_propagation_communities(g))
    coms = [list(x) for x in coms]

    return NodeClustering(coms,
                          g,
                          "Label Propagation",
                          method_parameters={"": ""})
Exemple #28
0
def greedy_modularity(g, weight=None):
    """
    The CNM algorithm uses the modularity to find the communities strcutures.
    At every step of the algorithm two communities that contribute maximum positive value to global modularity are merged.

    :param g: a networkx/igraph object
    :param weight: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.greedy_modularity(G)

    :References:

    Clauset, A., Newman, M. E., & Moore, C. `Finding community structure in very large networks. <http://ece-research.unm.edu/ifis/papers/community-moore.pdf/>`_ Physical Review E 70(6), 2004
    """
    g = convert_graph_formats(g, nx.Graph)

    coms = nx.algorithms.community.greedy_modularity_communities(g, weight)
    coms = [list(x) for x in coms]
    return NodeClustering(coms,
                          g,
                          "Greedy Modularity",
                          method_parameters={"weight": weight})
Exemple #29
0
def danmf(g_original,
          layers=(32, 8),
          pre_iterations=100,
          iterations=100,
          seed=42,
          lamb=0.01):
    """
    The procedure uses telescopic non-negative matrix factorization in order to learn a cluster memmbership distribution over nodes. The method can be used in an overlapping and non-overlapping way.

    :param g_original: a networkx/igraph object
    :param layers: Autoencoder layer sizes in a list of integers. Default [32, 8].
    :param pre_iterations: Number of pre-training epochs. Default 100.
    :param iterations: Number of training epochs. Default 100.
    :param seed: Random seed for weight initializations. Default 42.
    :param lamb: Regularization parameter. Default 0.01.
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.danmf(G)

    :References:

    Ye, Fanghua, Chuan Chen, and Zibin Zheng. "Deep autoencoder-like nonnegative matrix factorization for community detection." Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.

    .. note:: Reference implementation: https://karateclub.readthedocs.io/
    """
    g = convert_graph_formats(g_original, nx.Graph)
    model = DANMF(layers, pre_iterations, iterations, seed, lamb)

    mapping = {node: i for i, node in enumerate(g.nodes())}
    rev = {i: node for node, i in mapping.items()}
    H = nx.relabel_nodes(g, mapping)

    model.fit(H)
    members = model.get_memberships()

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, c in members.items():
        coms_to_node[c].append(rev[n])

    coms = [list(c) for c in coms_to_node.values()]

    return NodeClustering(coms,
                          g_original,
                          "DANMF",
                          method_parameters={
                              "layers": layers,
                              "pre_iteration": pre_iterations,
                              "iterations": iterations,
                              "seed": seed,
                              "lamb": lamb
                          },
                          overlap=True)
Exemple #30
0
def surprise_communities(g,
                         initial_membership=None,
                         weights=None,
                         node_sizes=None):
    """

    Surprise_communities is a model where the quality function to optimize is:

    .. math:: Q = m D(q \\parallel \\langle q \\rangle)

    where :math:`m` is the number of edges,  :math:`q = \\frac{\\sum_c m_c}{m}`,  is the fraction of internal edges, :math:`\\langle q \\rangle = \\frac{\\sum_c \\binom{n_c}{2}}{\\binom{n}{2}}` is the expected fraction of internal edges, and finally

    :math:`D(x \\parallel y) = x \\ln \\frac{x}{y} + (1 - x) \\ln \\frac{1 - x}{1 - y}`  is the binary Kullback-Leibler divergence.

    For directed graphs we can multiplying the binomials by 2, and this leaves :math:`\\langle q \\rangle` unchanged, so that we can simply use the same
    formulation.  For weighted graphs we can simply count the total internal weight instead of the total number of edges for :math:`q` , while :math:`\\langle q \\rangle` remains unchanged.

    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.surprise_communities(G)

    :References:

    Traag, V. A., Aldecoa, R., & Delvenne, J.-C. (2015).  `Detecting communities using asymptotical surprise. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.92.022816/>`_ Physical Review E, 92(2), 022816. 10.1103/PhysRevE.92.022816

    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(g,
                                    leidenalg.SurpriseVertexPartition,
                                    initial_membership=initial_membership,
                                    weights=weights,
                                    node_sizes=node_sizes)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "Surprise",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "weights": weights,
                              "node_sizes": node_sizes
                          })