Beispiel #1
0
def congo(g_original, number_communities, height=2):
    """
    CONGO (CONGA Optimized) is an optimization of the CONGA algortithm.
    The CONGO algorithm is the same as CONGA but using local betweenness. The complete CONGO algorithm is as follows:

    1. Calculate edge betweenness of edges and split betweenness of vertices.
    2. Find edge with maximum edge betweenness or vertex with maximum split betweenness, if greater.
    3. Recalculate edge betweenness and split betweenness:
        a) Subtract betweenness of h-region centred on the removed edge or split vertex.
        b) Remove the edge or split the vertex.
        c) Add betweenness for the same region.
    4. Repeat from step 2 until no edges remain.

    :param g_original: a networkx/igraph object
    :param number_communities: the number of communities desired
    :param height: The lengh of the longest shortest paths that CONGO considers, default 2
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.congo(G, number_communities=3, height=2)

    :References:

    Gregory, Steve. `A fast algorithm to find overlapping communities in networks. <https://link.springer.com/chapter/10.1007/978-3-540-87479-9_45/>`_ Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2008.

    .. note:: Reference implementation: https://github.com/Lab41/Circulo/tree/master/circulo/algorithms

    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g_original, ig.Graph)

    communities = Congo_(g, number_communities, height)

    coms = []
    for c in communities:
        coms.append([g.vs[x]['name'] for x in c])

    return NodeClustering(coms,
                          g_original,
                          "Congo",
                          method_parameters={
                              "number_communities": number_communities,
                              "height": height
                          },
                          overlap=True)
Beispiel #2
0
def PP(
    l: int, k: int, p_in: float, p_out: float, seed: object = 42, directed: bool = False
) -> [object, object]:
    """
    Returns the planted l-partition graph.

    This model partitions a graph with n=l*k vertices in l groups with k vertices each. Vertices of the same group are linked with a probability p_in, and vertices of different groups are linked with probability p_out.

    :param l: Number of groups
    :param k: Number of vertices in each group
    :param p_in: probability of connecting vertices within a group
    :param p_out:  probability of connected vertices between groups
    :param seed: Indicator of random number generation state.
    :param directed: hether to create a directed graph or not. Boolean, default False

    :return: A networkx synthetic graph, the set of communities  (NodeClustering object)

    :Example:

    >>> from cdlib.benchmark import planted_partitions
    >>> G, coms = planted_partitions(4, 3, 0.5, 0.1, seed=42)

    :References:

    A. Condon, R.M. Karp, Algorithms for graph partitioning on the planted partition model, Random Struct. Algor. 18 (2001) 116-140.
    Santo Fortunato ‘Community Detection in Graphs’ Physical Reports Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612

    .. note:: Reference implementation: https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.planted_partition_graph.html#networkx.generators.community.planted_partition_graph
    """
    from networkx.generators.community import planted_partition_graph

    G = planted_partition_graph(
        l=l, k=k, p_in=p_in, p_out=p_out, seed=seed, directed=directed
    )
    communities = defaultdict(list)
    for n, data in G.nodes(data=True):
        communities[data["block"]].append(n)

    coms = NodeClustering(
        list(communities.values()),
        G,
        "planted_partitions",
        method_parameters={
            "l": l,
            "k": k,
            "p_in": p_in,
            "p_out": p_out,
            "seed": seed,
            "directed": directed,
        },
    )

    return G, coms
Beispiel #3
0
def plot_community_graph(graph,
                         partition,
                         figsize=(8, 8),
                         node_size=200,
                         plot_overlaps=False,
                         plot_labels=False):
    """
        Plot a algorithms-graph with node color coding for communities.

        :param graph: NetworkX/igraph graph
        :param partition: NodeClustering object
        :param figsize: the figure size; it is a pair of float, default (8, 8)
        :param node_size: int, default 200
        :param plot_overlaps: bool, default False. Flag to control if multiple algorithms memberships are plotted.
        :param plot_labels: bool, default False. Flag to control if node labels are plotted.

        Example:

        >>> from cdlib import algorithms, viz
        >>> import networkx as nx
        >>> g = nx.karate_club_graph()
        >>> coms = algorithms.louvain(g)
        >>> viz.plot_community_graph(g, coms)
        """

    cms = partition.communities

    node_to_com = {}
    for cid, com in enumerate(cms):
        for node in com:
            if node not in node_to_com:
                node_to_com[node] = cid
            else:
                # duplicating overlapped node
                alias = "%s_%s" % (node, cid)
                node_to_com[alias] = cid
                edges = [(alias, y) for y in graph.neighbors(node)]
                graph.add_edges_from(edges)

    # handling partial coverage
    s = nx.subgraph(graph, node_to_com.keys())

    # algorithms graph construction
    c_graph = induced_graph(node_to_com, s)
    node_cms = [[node] for node in c_graph.nodes()]

    return plot_network_clusters(c_graph,
                                 NodeClustering(node_cms, None, ""),
                                 nx.spring_layout(c_graph),
                                 figsize=figsize,
                                 node_size=node_size,
                                 plot_overlaps=plot_overlaps,
                                 plot_labels=plot_labels)
Beispiel #4
0
def infomap(g_original):
    """
    Infomap is based on ideas of information theory.
    The algorithm uses the probability flow of random walks on a network as a proxy for information flows in the real system and it decomposes the network into modules by compressing a description of the probability flow.

    :param g_original: a networkx/igraph object
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.infomap(G)

    :References:

    Rosvall M, Bergstrom CT (2008) `Maps of random walks on complex networks reveal community structure. <https://www.pnas.org/content/105/4/1118/>`_ Proc Natl Acad SciUSA 105(4):1118–1123

    .. note:: Reference implementation: https://pypi.org/project/infomap/
    """

    if imp is None:
        raise ModuleNotFoundError("Optional dependency not satisfied: install infomap to use the selected feature.")
    if pipes is None:
        raise ModuleNotFoundError("Optional dependency not satisfied: install package wurlitzer to use infomap.")

    g = convert_graph_formats(g_original, nx.Graph)

    g.is_directed()

    g1 = nx.convert_node_labels_to_integers(g, label_attribute="name")
    name_map = nx.get_node_attributes(g1, 'name')
    coms_to_node = defaultdict(list)

    with pipes():
        im = imp.Infomap()
        for e in g1.edges(data=True):
            if len(e) == 3 and 'weight' in e[2]:
                im.addLink(e[0], e[1], e[2]['weight'])
            else:
                im.addLink(e[0], e[1])
        im.run()

        for node in im.iterTree():
            if node.isLeaf():
                nid = node.physicalId
                module = node.moduleIndex()
                nm = name_map[nid]
                coms_to_node[module].append(nm)

    coms_infomap = [list(c) for c in coms_to_node.values()]
    return NodeClustering(coms_infomap, g_original, "Infomap", method_parameters={"": ""})
Beispiel #5
0
def wCommunity(g_original,
               min_bel_degree=0.7,
               threshold_bel_degree=0.7,
               weightName="weight"):
    """
        Algorithm to identify overlapping communities in weighted graphs

        :param g_original: a networkx/igraph object
        :param min_bel_degree: the tolerance, in terms of beloging degree, required in order to add a node in a community
        :param threshold_bel_degree: the tolerance, in terms of beloging degree, required in order to add a node in a 'NLU' community
        :param weightName: name of the edge attribute containing the weights
        :return: NodeClustering object

        :Example:

        >>> from cdlib import algorithms
        >>> import networkx as nx
        >>> G = nx.karate_club_graph()
        >>> nx.set_edge_attributes(G, values=1, name='weight')
        >>> coms = algorithms.wCommunity(G, min_bel_degree=0.6, threshold_bel_degree=0.6)

        :References:

        Chen, D., Shang, M., Lv, Z., & Fu, Y. (2010). Detecting overlapping communities of weighted networks via a local algorithm. Physica A: Statistical Mechanics and its Applications, 389(19), 4177-4187.

        .. note:: Implementation provided by Marco Cardia <*****@*****.**> and Francesco Sabiu <*****@*****.**> (Computer Science Dept., University of Pisa, Italy)
        """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g_original, ig.Graph)
    # Initialization
    comm = weightedCommunity(g,
                             min_bel_degree=min_bel_degree,
                             threshold_bel_degree=threshold_bel_degree,
                             weightName=weightName)
    # Community computation
    comm.computeCommunities()
    # Result
    coms = comm.getCommunities()
    coms = [list(c) for c in coms]
    return NodeClustering(coms,
                          g_original,
                          "wCommunity",
                          method_parameters={
                              "min_bel_degree": min_bel_degree,
                              "threshold_bel_degree": threshold_bel_degree,
                              'weightName': weightName
                          },
                          overlap=True)
Beispiel #6
0
def rb_pots(g, initial_membership=None, weights=None, resolution_parameter=1):
    """
    Rb_pots is a Leiden model where the quality function to optimize is:

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\frac{k_i k_j}{2m} \\right)\\delta(\\sigma_i, \\sigma_j)

    where :math:`A` is the adjacency matrix, :math:`k_i` is the (weighted) degree of node :math:`i`, :math:`m` is the total number of edges (or total edge weight), :math:`\\sigma_i` denotes the community of node :math:`i` and :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if :math:`\\sigma_i = \\sigma_j` and `0` otherwise.
    For directed graphs a slightly different formulation is used, as proposed by Leicht and Newman :

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\frac{k_i^\mathrm{out} k_j^\mathrm{in}}{m} \\right)\\delta(\\sigma_i, \\sigma_j),

    where :math:`k_i^\\mathrm{out}` and :math:`k_i^\\mathrm{in}` refers to respectively the outdegree and indegree of node :math:`i` , and :math:`A_{ij}` refers to an edge from :math:`i` to :math:`j`.
    Note that this is the same of Leiden algorithm when setting :math:`\\gamma=1` and normalising by :math:`2m`, or :math:`m` for directed graphs.


    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Default 1
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.rb_pots(G)

    :References:

    Reichardt, J., & Bornholdt, S. (2006).  `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_  Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110

    Leicht, E. A., & Newman, M. E. J. (2008).  `Community Structure in Directed Networks. <https://www.ncbi.nlm.nih.gov/pubmed/18517839/>`_  Physical Review Letters, 100(11), 118703. 10.1103/PhysRevLett.100.118703

    """

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(g,
                                    leidenalg.RBConfigurationVertexPartition,
                                    resolution_parameter=resolution_parameter,
                                    initial_membership=initial_membership,
                                    weights=weights)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "RB Pots",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "weights": weights,
                              "resolution_parameter": resolution_parameter
                          })
Beispiel #7
0
def sbm_dl(g, B_min=None, B_max=None, deg_corr=True, **kwargs):
    """Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models.

    Fit a non-overlapping stochastic block model (SBM) by minimizing its description length using an agglomerative heuristic.
    If no parameter is given, the number of blocks will be discovered automatically. Bounds for the number of communities can
    be provided using B_min, B_max.

    :param B_min: minimum number of communities that can be found
    :param B_max: maximum number of communities that can be found
    :param deg_corr: if true, use the degree corrected version of the SBM
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = sbm_dl(G)


    :References:

    Tiago P. Peixoto, “Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models”, Phys. Rev. E 89, 012804 (2014), DOI: 10.1103/PhysRevE.89.012804 [sci-hub, @tor], arXiv: 1310.4378.
    .. note:: Use implementation from graph-tool library, please report to https://graph-tool.skewed.de for details
    """
    if gt is None:
        raise Exception(
            "===================================================== \n"
            "The graph-tool library seems not to be installed (or incorrectly installed). \n"
            "Please check installation procedure there https://git.skewed.de/count0/graph-tool/wikis/installation-instructions#native-installation \n"
            "on linux/mac, you can use package managers to do so(apt-get install python3-graph-tool, brew install graph-tool, etc.)"
        )
    gt_g = convert_graph_formats(g, nx.Graph)
    gt_g, label_map = __from_nx_to_graph_tool(gt_g)
    state = gt.minimize_blockmodel_dl(gt_g, B_min, B_max, deg_corr=deg_corr)

    affiliations = state.get_blocks().get_array()
    affiliations = {
        label_map[i]: affiliations[i]
        for i in range(len(affiliations))
    }
    coms = affiliations2nodesets(affiliations)
    coms = [list(v) for k, v in coms.items()]
    return NodeClustering(coms,
                          g,
                          "SBM",
                          method_parameters={
                              "B_min": B_min,
                              "B_max": B_max,
                              "deg_corr": deg_corr
                          })
Beispiel #8
0
def louvain(g, weight='weight', resolution=1., randomize=False):
    """
    Louvain  maximizes a modularity score for each community.
    The algorithm optimises the modularity in two elementary phases:
    (1) local moving of nodes;
    (2) aggregation of the network.
    In the local moving phase, individual nodes are moved to the community that yields the largest increase in the quality function.
    In the aggregation phase, an aggregate network is created based on the partition obtained in the local moving phase.
    Each community in this partition becomes a node in the aggregate network. The two phases are repeated until the quality function cannot be increased further.

    :param g: a networkx/igraph object
    :param weight: str, optional the key in graph to use as weight. Default to 'weight'
    :param resolution: double, optional  Will change the size of the communities, default to 1.
    :param randomize:  boolean, optional  Will randomize the node evaluation order and the community evaluation  order to get different partitions at each call, default False
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.louvain(G, weight='weight', resolution=1., randomize=False)

    :References:

    Blondel, Vincent D., et al. `Fast unfolding of communities in large networks. <https://iopscience.iop.org/article/10.1088/1742-5468/2008/10/P10008/meta/>`_ Journal of statistical mechanics: theory and experiment 2008.10 (2008): P10008.

    .. note:: Reference implementation: https://github.com/taynaud/python-louvain
    """

    g = convert_graph_formats(g, nx.Graph)

    coms = louvain_modularity.best_partition(g,
                                             weight=weight,
                                             resolution=resolution,
                                             randomize=randomize)

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, c in coms.items():
        coms_to_node[c].append(n)

    coms_louvain = [list(c) for c in coms_to_node.values()]
    return NodeClustering(coms_louvain,
                          g,
                          "Louvain",
                          method_parameters={
                              "weight": weight,
                              "resolution": resolution,
                              "randomize": randomize
                          })
Beispiel #9
0
def louvain_to_cdlib_coms(G, coms):
    uG = G.to_undirected()
    coms_to_node = defaultdict(list)
    for n, c in coms.items():
        coms_to_node[c].append(n)

    coms_louvain = [list(c) for c in coms_to_node.values()]

    cdlib_coms = NodeClustering(coms_louvain,
                                uG,
                                'louvain',
                                method_parameters={'weight': 'shared_doctors'})
    return cdlib_coms
Beispiel #10
0
def significance_communities(g, initial_membership=None, node_sizes=None):
    """
    Significance_communities is a model where the quality function to optimize is:

    .. math:: Q = \\sum_c \\binom{n_c}{2} D(p_c \\parallel p)

    where :math:`n_c` is the number of nodes in community :math:`c`, :math:`p_c = \\frac{m_c}{\\binom{n_c}{2}}`, is the density of community :math:`c`,  :math:`p = \\frac{m}{\\binom{n}{2}}`  is the overall density of the graph, and finally  :math:`D(x \\parallel y) = x \\ln \\frac{x}{y} + (1 - x) \\ln \\frac{1 - x}{1 - y}` is the binary Kullback-Leibler divergence.
    For directed graphs simply multiply the binomials by 2. The expected Significance in Erdos-Renyi graphs behaves roughly as :math:`\\frac{1}{2} n \\ln n` for both directed and undirected graphs in this formulation.

    .. warning:: This method is not suitable for weighted graphs.


    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.significance_communities(G)

    :References:

    Traag, V. A., Krings, G., & Van Dooren, P. (2013). `Significant scales in community structure. <https://www.nature.com/articles/srep02930/>`_  Scientific Reports, 3, 2930. `10.1038/srep02930 <http://doi.org/10.1038/srep02930>`

    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(g,
                                    leidenalg.SignificanceVertexPartition,
                                    initial_membership=initial_membership,
                                    node_sizes=node_sizes)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "Significance",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "node_sizes": node_sizes
                          })
Beispiel #11
0
def RPG(
    sizes: list, p_in: float, p_out: float, seed: object = 42, directed: bool = False
) -> [object, object]:
    """
    Returns the random partition graph with a partition of sizes.

    A partition graph is a graph of communities with sizes defined by s in sizes. Nodes in the same group are connected with probability p_in and nodes of different groups are connected with probability p_out.

    :param sizes: Sizes of groups (list of ints)
    :param p_in: probability of connecting vertices within a group
    :param p_out:  probability of connected vertices between groups
    :param seed: Indicator of random number generation state.
    :param directed: hether to create a directed graph or not. Boolean, default False

    :return: A networkx synthetic graph, the set of communities  (NodeClustering object)

    :Example:

    >>> from cdlib.benchmark import RPG
    >>> G, coms = RPG([10, 10, 10], 0.25, 0.01)

    :References:

    Santo Fortunato ‘Community Detection in Graphs’ Physical Reports Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612

    .. note:: Reference implementation: https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.random_partition_graph.html#networkx.generators.community.random_partition_graph
    """

    from networkx.generators.community import random_partition_graph

    G = random_partition_graph(
        sizes=sizes, p_in=p_in, p_out=p_out, seed=seed, directed=directed
    )
    communities = defaultdict(list)
    for n, data in G.nodes(data=True):
        communities[data["block"]].append(n)

    coms = NodeClustering(
        list(communities.values()),
        G,
        "RPG",
        method_parameters={
            "sizes": sizes,
            "p_in": p_in,
            "p_out": p_out,
            "seed": seed,
            "directed": directed,
        },
    )

    return G, coms
Beispiel #12
0
def node_perception(g_original, threshold, overlap_threshold, min_comm_size=3):
    """Node perception is based on the idea of joining together small sets of nodes.
    The algorithm first identifies sub-communities corresponding to each node’s perception of the network around it.
    To perform this step, it considers each node individually, and partition that node’s neighbors into communities using some existing community detection method.
    Next, it creates a new network in which every node corresponds to a sub-community, and two nodes are linked if their associated sub-communities overlap by at least some threshold amount.
    Finally, the algorithm identifies overlapping communities in this new network, and for every such community, merge together the associated sub-communities to identify communities in the original network.

    :param g_original: a networkx/igraph object
    :param threshold: the tolerance required in order to merge communities
    :param overlap_threshold: the overlap tolerance
    :param min_comm_size: minimum community size default 3
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.node_perception(G, threshold=0.25, overlap_threshold=0.25)

    :References:

    Sucheta Soundarajan and John E. Hopcroft. 2015. `Use of Local Group Information to Identify Communities in Networks. <https://dl.acm.org/citation.cfm?id=2737800.2700404/>`_ ACM Trans. Knowl. Discov. Data 9, 3, Article 21 (April 2015), 27 pages. DOI=http://dx.doi.org/10.1145/2700404

    """
    g = convert_graph_formats(g_original, nx.Graph)
    tp = type(list(g.nodes())[0])

    with suppress_stdout():
        np = NodePerception(g,
                            sim_threshold=threshold,
                            overlap_threshold=overlap_threshold,
                            min_comm_size=min_comm_size)
        coms = np.execute()
        if tp != str:
            communities = []
            for c in coms:
                c = list(map(tp, c))
                communities.append(c)
            coms = communities

    return NodeClustering(coms,
                          g_original,
                          "Node Perception",
                          method_parameters={
                              "threshold": threshold,
                              "overlap_threshold": overlap_threshold,
                              "min_com_size": min_comm_size
                          },
                          overlap=True)
Beispiel #13
0
def big_clam(g_original, dimensions=8, iterations=50, learning_rate=0.005):
    """
    BigClam is an overlapping community detection method that scales to large networks.
    The procedure uses gradient ascent to create an embedding which is used for deciding the node-cluster affiliations.

    :param g_original: a networkx/igraph object
    :param dimensions: Number of embedding dimensions. Default 8.
    :param iterations: Number of training iterations. Default 50.
    :param learning_rate: Gradient ascent learning rate. Default is 0.005.
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.big_clam(G)

    :References:

    Yang, Jaewon, and Jure Leskovec. "Overlapping community detection at scale: a nonnegative matrix factorization approach." Proceedings of the sixth ACM international conference on Web search and data mining. 2013.

    .. note:: Reference implementation: https://karateclub.readthedocs.io/
    """

    g = convert_graph_formats(g_original, nx.Graph)

    model = BigClam(dimensions=dimensions,
                    iterations=iterations,
                    learning_rate=learning_rate)
    model.fit(g)
    members = model.get_memberships()

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, c in members.items():
        coms_to_node[c].append(n)

    coms = [list(c) for c in coms_to_node.values()]

    return NodeClustering(coms,
                          g_original,
                          "BigClam",
                          method_parameters={
                              "dimensions": dimensions,
                              "iterations": iterations,
                              "learning_rate": learning_rate
                          },
                          overlap=True)
Beispiel #14
0
def angel(g_original, threshold, min_community_size=3):
    """
    Angel is a node-centric bottom-up community discovery algorithm.
    It leverages ego-network structures and overlapping label propagation to identify micro-scale communities that are subsequently merged in mesoscale ones.
    Angel is the, faster, successor of Demon.

    :param g_original: a networkx/igraph object
    :param threshold: merging threshold in [0,1].
    :param min_community_size: minimum community size, default 3.
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.angel(G, min_com_size=3, threshold=0.25)

    :References:

    1. Rossetti, Giulio. "Exorcising the Demon: Angel, Efficient Node-Centric Community Discovery." International Conference on Complex Networks and Their Applications. Springer, Cham, 2019.

    .. note:: Reference implementation: https://github.com/GiulioRossetti/ANGEL
    """
    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )
    if Angel is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install angel-cd library to use the selected feature (likely pip install angel-cd). If using a notebook, you need also to restart your runtime/kernel."
        )

    g = convert_graph_formats(g_original, ig.Graph)
    with suppress_stdout():
        a = Angel(graph=g,
                  min_comsize=min_community_size,
                  threshold=threshold,
                  save=False)
        coms = a.execute()

    return NodeClustering(list(coms.values()),
                          g_original,
                          "ANGEL",
                          method_parameters={
                              "threshold": threshold,
                              "min_community_size": min_community_size
                          },
                          overlap=True)
Beispiel #15
0
def conga(g_original, number_communities):
    """
    CONGA (Cluster-Overlap Newman Girvan Algorithm) is an algorithm for discovering overlapping communities.
    It extends the  Girvan and Newman’s algorithm with a specific method of deciding when and how to split vertices. The algorithm is as follows:

    1. Calculate edge betweenness of all edges in network.
    2. Calculate vertex betweenness of vertices, from edge betweennesses.
    3. Find candidate set of vertices: those whose vertex betweenness is greater than the maximum edge betweenness.
    4. If candidate set is non-empty, calculate pair betweennesses of candidate vertices, and then calculate split betweenness of candidate vertices.
    5. Remove edge with maximum edge betweenness or split vertex with maximum split betweenness (if greater).
    6. Recalculate edge betweenness for all remaining edges in same component(s) as removed edge or split vertex.
    7. Repeat from step 2 until no edges remain.

    :param g_original: a networkx/igraph object
    :param number_communities: the number of communities desired
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.conga(G, number_communities=3)

    :References:

    Gregory, Steve. `An algorithm to find overlapping community structure in networks. <https://link.springer.com/chapter/10.1007/978-3-540-74976-9_12/>`_ European Conference on Principles of Data Mining and Knowledge Discovery. Springer, Berlin, Heidelberg, 2007.

    .. note:: Reference implementation: https://github.com/Lab41/Circulo/tree/master/circulo/algorithms
    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g_original, ig.Graph)

    communities = Conga_(g, number_communities=3)
    coms = []
    for c in communities:
        coms.append([g.vs[x]['name'] for x in c])

    return NodeClustering(
        coms,
        g_original,
        "Conga",
        method_parameters={"number_communities": number_communities},
        overlap=True)
Beispiel #16
0
def der(g, walk_len=3, threshold=.00001, iter_bound=50):
    """
    DER is a Diffusion Entropy Reducer graph clustering algorithm.
    The algorithm uses random walks to embed the graph in a space of measures, after which a modification of k-means in that space is applied. It creates the walks, creates an initialization, runs the algorithm,
    and finally extracts the communities.

    :param graph: an undirected networkx graph object
    :param walk_len: length of the random walk, default 3
    :param threshold: threshold for stop criteria; if the likelihood_diff is less than threshold tha algorithm stops, default 0.00001
    :param iter_bound: maximum number of iteration, default 50
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.der(G, 3, .00001, 50)


    :References:

    M. Kozdoba and S. Mannor, `Community Detection via Measure Space Embedding <https://papers.nips.cc/paper/5808-community-detection-via-measure-space-embedding/>`_, NIPS 2015

    .. note:: Reference implementation: https://github.com/komarkdev/der_graph_clustering
    """

    graph = convert_graph_formats(g, nx.Graph)

    communities, _ = DER.der_graph_clustering(graph,
                                              walk_len=walk_len,
                                              alg_threshold=threshold,
                                              alg_iterbound=iter_bound)

    maps = {k: v for k, v in enumerate(graph.nodes())}
    coms = []
    for c in communities:
        coms.append([maps[n] for n in c])

    return NodeClustering(coms,
                          graph,
                          "DER",
                          method_parameters={
                              "walk_len": walk_len,
                              "threshold": threshold,
                              "iter_bound": iter_bound
                          })
Beispiel #17
0
def read_community_json(path: str, compress: bool = False) -> object:
    """
    Read community list from JSON file.

    :param path: input filename
    :param compress: wheter the file is in a copress format, default False
    :return: a Clustering object

    :Example:

    >>> import networkx as nx
    >>> from cdlib import algorithms, readwrite
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> readwrite.write_community_json(coms, "communities.json")
    >>> readwrite.read_community_json(coms, "communities.json")
    """

    if compress:
        op = gzip.open
    else:
        op = open

    with op(path, "rt") as f:
        coms = json.load(f)

    nc = NodeClustering(
        [list(c) for c in coms["communities"]],
        None,
        coms["algorithm"],
        coms["params"],
        coms["overlap"],
    )
    nc.node_coverage = coms["coverage"]

    if "allocation_matrix" in coms:
        nc.__class__ = FuzzyNodeClustering
        nc.allocation_matrix = coms["allocation_matrix"]

    if type(nc.communities[0][0]) is list:
        cms = []
        for c in nc.communities:
            cm = []
            for e in c:
                cm.append(tuple(e))
            cms.append(list(cm))
        nc.communities = cms
        nc.__class__ = EdgeClustering

    return nc
Beispiel #18
0
def lemon(graph, seeds, min_com_size=20, max_com_size=50, expand_step=6, subspace_dim=3, walk_steps=3, biased=False):
    """Lemon is a large scale overlapping community detection method based on local expansion via minimum one norm.

    The algorithm adopts a local expansion method in order to identify the community members from a few exemplary seed members.
    The algorithm finds the community by seeking a sparse vector in the span of the local spectra such that the seeds are in its support. LEMON can achieve the highest detection accuracy among state-of-the-art proposals. The running time depends on the size of the community rather than that of the entire graph.

    :param graph: a networkx/igraph object
    :param seeds: Node list
    :param min_com_size: the minimum size of a single community in the network, default 20
    :param max_com_size: the maximum size of a single community in the network, default 50
    :param expand_step: the step of seed set increasement during expansion process, default 6
    :param subspace_dim: dimension of the subspace; choosing a large dimension is undesirable because it would increase the computation cost of generating local spectra default 3
    :param walk_steps: the number of step for the random walk, default 3
    :param biased: boolean; set if the random walk starting from seed nodes, default False
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> seeds = ["$0$", "$2$", "$3$"]
    >>> coms = algorithms.lemon(G, seeds, min_com_size=2, max_com_size=5)

    :References:

    Yixuan Li, Kun He, David Bindel, John Hopcroft `Uncovering the small community structure in large networks: A local spectral approach. <https://dl.acm.org/citation.cfm?id=2736277.2741676/>`_ Proceedings of the 24th international conference on world wide web. International World Wide Web Conferences Steering Committee, 2015.

    .. note:: Reference implementation: https://github.com/YixuanLi/LEMON
    """

    graph = convert_graph_formats(graph, nx.Graph)
    graph_m = nx.convert_matrix.to_numpy_array(graph)

    node_to_pos = {n: p for p, n in enumerate(graph.nodes())}
    pos_to_node = {p: n for n, p in node_to_pos.items()}

    seeds = np.array([node_to_pos[s] for s in seeds])

    community = LEMON.lemon(graph_m, seeds, min_com_size, max_com_size, expand_step,
                            subspace_dim=subspace_dim, walk_steps=walk_steps, biased=biased)

    return NodeClustering([[pos_to_node[n] for n in community]], graph,
                          "LEMON", method_parameters=dict(seeds=str(list(seeds)), min_com_size=min_com_size,
                                                          max_com_size=max_com_size, expand_step=expand_step,
                                                          subspace_dim=subspace_dim, walk_steps=walk_steps,
                                                          biased=biased), overlap=True)
Beispiel #19
0
def rber_pots(g_original, initial_membership=None, weights=None, node_sizes=None, resolution_parameter=1):
    """
    rber_pots is a  model where the quality function to optimize is:

    .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma p \\right)\\delta(\\sigma_i, \\sigma_j)

    where :math:`A` is the adjacency matrix,  :math:`p = \\frac{m}{\\binom{n}{2}}` is the overall density of the graph, :math:`\\sigma_i` denotes the community of node :math:`i`, :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if  :math:`\\sigma_i = \\sigma_j` and `0` otherwise, and, finally :math:`\\gamma` is a resolution parameter.


    :param g_original: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Deafault 1
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.rber_pots(G)

    :References:

    Reichardt, J., & Bornholdt, S. (2006).  `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_  Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110


    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    if ig is None:
        raise ModuleNotFoundError("Optional dependency not satisfied: install igraph to use the selected feature.")

    g = convert_graph_formats(g_original, ig.Graph)

    part = leidenalg.find_partition(g, leidenalg.RBERVertexPartition,
                                    resolution_parameter=resolution_parameter,
                                    initial_membership=initial_membership, weights=weights,
                                    node_sizes=node_sizes,
                                    )
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms, g_original, "RBER Pots", method_parameters={"initial_membership": initial_membership,
                                                                   "weights": weights, "node_sizes": node_sizes,
                                                                   "resolution_parameter": resolution_parameter})
Beispiel #20
0
def copra(graph, param)->List[List[int]]:
    inputFile = 'copra_input.txt'
    z = get_ipython().getoutput(f'rm -f {inputFile} || true')
    nx.write_edgelist(graph, inputFile,  data=False)
    outputDir = "COPRA"
    outputFile = outputDir + "/" + "clusters-" + inputFile.split('/')[-1]
    get_ipython().system(f'rm -rf {outputDir}')
    get_ipython().system(f'mkdir {outputDir}')
    cmd = f'(cd {outputDir} && java -cp ../../related_methods/OSLOM2/copra.jar COPRA ../{inputFile} -v {param} -repeat 100 -mo -nosplit)'
    # print(f'running: [{cmd}]')
    tmp = get_ipython().getoutput(cmd)
    com = []
    with open(outputFile) as f:
        lines = f.readlines()
        com = [[int(a) for a in line.split()] for line in lines]

    return NodeClustering(communities=com, graph=graph, method_name='copra', method_parameters=param, overlap=isOverlap(com))
Beispiel #21
0
def angel(g, threshold, min_community_size=3):
    """
    Angel is a node-centric bottom-up community discovery algorithm.
    It leverages ego-network structures and overlapping label propagation to identify micro-scale communities that are subsequently merged in mesoscale ones.
    Angel is the, faster, successor of Demon.

    :param g: a networkx/igraph object
    :param threshold: merging threshold in [0,1].
    :param min_community_size: minimum community size, default 3.
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.angel(G, min_com_size=3, threshold=0.25)

    :References:

    1. Rossetti G. **Angel: efficient, and effective, node-centric community discovery in static and dynamic networks.**

    .. note:: Reference implementation: https://github.com/GiulioRossetti/ANGEL
    """

    if ig is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph to use the selected feature."
        )

    g = convert_graph_formats(g, ig.Graph)
    with suppress_stdout():
        a = Angel(graph=g,
                  min_comsize=min_community_size,
                  threshold=threshold,
                  save=False)
        coms = a.execute()

    return NodeClustering(list(coms.values()),
                          g,
                          "ANGEL",
                          method_parameters={
                              "threshold": threshold,
                              "min_community_size": min_community_size
                          },
                          overlap=True)
Beispiel #22
0
def leiden(g, initial_membership=None, weights=None):
    """
    The Leiden algorithm is an improvement of the Louvain algorithm.
    The Leiden algorithm consists of three phases:
    (1) local moving of nodes,
    (2) refinement of the partition
    (3) aggregation of the network based on the refined partition, using the non-refined partition to create an initial partition for the aggregate network.

    :param g: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.leiden(G)

    :References:

    Traag, Vincent, Ludo Waltman, and Nees Jan van Eck. `From Louvain to Leiden: guaranteeing well-connected communities. <https://arxiv.org/abs/1810.08473/>`_ arXiv preprint arXiv:1810.08473 (2018).

    .. note:: Reference implementation: https://github.com/vtraag/leidenalg
    """

    if ig is None or leidenalg is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: install igraph and leidenalg to use the "
            "selected feature.")

    g = convert_graph_formats(g, ig.Graph)

    part = leidenalg.find_partition(g,
                                    leidenalg.ModularityVertexPartition,
                                    initial_membership=initial_membership,
                                    weights=weights)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms,
                          g,
                          "Leiden",
                          method_parameters={
                              "initial_membership": initial_membership,
                              "weights": weights
                          })
Beispiel #23
0
def danmf(g_original, layers=(32, 8), pre_iterations=100, iterations=100, seed=42, lamb=0.01):
    """
    The procedure uses telescopic non-negative matrix factorization in order to learn a cluster memmbership distribution over nodes. The method can be used in an overlapping and non-overlapping way.

    :param g_original: a networkx/igraph object
    :param layers: Autoencoder layer sizes in a list of integers. Default [32, 8].
    :param pre_iterations: Number of pre-training epochs. Default 100.
    :param iterations: Number of training epochs. Default 100.
    :param seed: Random seed for weight initializations. Default 42.
    :param lamb: Regularization parameter. Default 0.01.
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.danmf(G)

    :References:

    Ye, Fanghua, Chuan Chen, and Zibin Zheng. "Deep autoencoder-like nonnegative matrix factorization for community detection." Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.

    .. note:: Reference implementation: https://karateclub.readthedocs.io/
    """
    g = convert_graph_formats(g_original, nx.Graph)
    model = DANMF(layers, pre_iterations, iterations, seed, lamb)

    mapping = {node: i for i, node in enumerate(g.nodes())}
    rev = {i: node for node,  i in mapping.items()}
    H = nx.relabel_nodes(g, mapping)

    model.fit(H)
    members = model.get_memberships()

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, c in members.items():
        coms_to_node[c].append(rev[n])

    coms = [list(c) for c in coms_to_node.values()]

    return NodeClustering(coms, g_original, "DANMF", method_parameters={"layers": layers, "pre_iteration": pre_iterations,
                                                               "iterations": iterations, "seed": seed, "lamb": lamb},
                          overlap=True)
Beispiel #24
0
def nmnf(g, dimensions=128, clusters=10, lambd=0.2, alpha=0.05, beta=0.05, iterations=200, lower_control=1e-15, eta=5.0):
    """
    The procedure uses joint non-negative matrix factorization with modularity based regul;arization in order to learn a cluster memmbership distribution over nodes. The method can be used in an overlapping and non-overlapping way.

    :param g: a networkx/igraph object
    :param dimensions: Number of dimensions. Default is 128.
    :param clusters: Number of clusters. Default is 10.
    :param lambd: KKT penalty. Default is 0.2
    :param alpha: Clustering penalty. Default is 0.05.
    :param beta: Modularity regularization penalty. Default is 0.05.
    :param iterations:  Number of power iterations. Default is 200.
    :param lower_control: Floating point overflow control. Default is 10**-15.
    :param eta: Similarity mixing parameter. Default is 5.0.
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.nmnf(G)

    :References:

    Wang, Xiao, et al. "Community preserving network embedding." Thirty-first AAAI conference on artificial intelligence. 2017.

    .. note:: Reference implementation: https://karateclub.readthedocs.io/
    """
    g = convert_graph_formats(g, nx.Graph)
    model = MNMF(dimensions=dimensions, clusters=clusters, lambd=lambd, alpha=alpha, beta=beta, iterations=iterations,
                 lower_control=lower_control, eta=eta)
    model.fit(g)
    members = model.get_memberships()

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, c in members.items():
        coms_to_node[c].append(n)

    coms = [list(c) for c in coms_to_node.values()]

    return NodeClustering(coms, g, "MNMF", method_parameters={"dimension": dimensions, "clusters": clusters,
                                                              "lambd": lambd, "alpha": alpha, "beta": beta,
                                                              "iterations": iterations, "lower_control": lower_control,
                                                              "eta": eta}, overlap=True)
Beispiel #25
0
def egonet_splitter(g_original, resolution=1.0):
    """
    The method first creates the egonets of nodes. A persona-graph is created which is clustered by the Louvain method.

    :param g_original: a networkx/igraph object
    :param resolution: Resolution parameter of Python Louvain. Default 1.0.
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.egonet_splitter(G)

    :References:

    Epasto, Alessandro, Silvio Lattanzi, and Renato Paes Leme. "Ego-splitting framework: From non-overlapping to overlapping clusters." Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017.

    .. note:: Reference implementation: https://karateclub.readthedocs.io/
    """
    g = convert_graph_formats(g_original, nx.Graph)
    model = EgoNetSplitter(resolution=resolution)

    mapping = {node: i for i, node in enumerate(g.nodes())}
    rev = {i: node for node, i in mapping.items()}
    H = nx.relabel_nodes(g, mapping)

    model.fit(H)
    members = model.get_memberships()

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, cs in members.items():
        for c in cs:
            coms_to_node[c].append(rev[n])

    coms = [list(c) for c in coms_to_node.values()]

    return NodeClustering(coms,
                          g_original,
                          "EgoNetSplitter",
                          method_parameters={"resolution": resolution},
                          overlap=True)
Beispiel #26
0
def nnsed(g_original, dimensions=32, iterations=10, seed=42):
    """
    The procedure uses non-negative matrix factorization in order to learn an unnormalized cluster membership distribution over nodes. The method can be used in an overlapping and non-overlapping way.

    :param g_original: a networkx/igraph object
    :param dimensions: Embedding layer size. Default is 32.
    :param iterations: Number of training epochs. Default 10.
    :param seed:  Random seed for weight initializations. Default 42.
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.nnsed(G)

    :References:

    Sun, Bing-Jie, et al. "A non-negative symmetric encoder-decoder approach for community detection." Proceedings of the 2017 ACM on Conference on Information and Knowledge Management. 2017.

    .. note:: Reference implementation: https://karateclub.readthedocs.io/
    """
    g = convert_graph_formats(g_original, nx.Graph)
    model = NNSED(dimensions=dimensions, iterations=iterations, seed=seed)
    model.fit(g)
    members = model.get_memberships()

    # Reshaping the results
    coms_to_node = defaultdict(list)
    for n, c in members.items():
        coms_to_node[c].append(n)

    coms = [list(c) for c in coms_to_node.values()]

    return NodeClustering(coms,
                          g_original,
                          "NNSED",
                          method_parameters={
                              "dimension": dimensions,
                              "iterations": iterations,
                              "seed": seed
                          },
                          overlap=True)
Beispiel #27
0
def surprise_communities(g_original, initial_membership=None, weights=None, node_sizes=None):
    """

    Surprise_communities is a model where the quality function to optimize is:

    .. math:: Q = m D(q \\parallel \\langle q \\rangle)

    where :math:`m` is the number of edges,  :math:`q = \\frac{\\sum_c m_c}{m}`,  is the fraction of internal edges, :math:`\\langle q \\rangle = \\frac{\\sum_c \\binom{n_c}{2}}{\\binom{n}{2}}` is the expected fraction of internal edges, and finally

    :math:`D(x \\parallel y) = x \\ln \\frac{x}{y} + (1 - x) \\ln \\frac{1 - x}{1 - y}`  is the binary Kullback-Leibler divergence.

    For directed graphs we can multiplying the binomials by 2, and this leaves :math:`\\langle q \\rangle` unchanged, so that we can simply use the same
    formulation.  For weighted graphs we can simply count the total internal weight instead of the total number of edges for :math:`q` , while :math:`\\langle q \\rangle` remains unchanged.

    :param g_original: a networkx/igraph object
    :param initial_membership:  list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None
    :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None
    :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases  this could be changed. Deafault None
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.surprise_communities(G)

    :References:

    Traag, V. A., Aldecoa, R., & Delvenne, J.-C. (2015).  `Detecting communities using asymptotical surprise. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.92.022816/>`_ Physical Review E, 92(2), 022816. 10.1103/PhysRevE.92.022816

    .. note:: Reference implementation: https://github.com/vtraag/leidenalg

    """

    if ig is None:
        raise ModuleNotFoundError("Optional dependency not satisfied: install igraph to use the selected feature.")

    g = convert_graph_formats(g_original, ig.Graph)

    part = leidenalg.find_partition(g, leidenalg.SurpriseVertexPartition, initial_membership=initial_membership,
                                    weights=weights, node_sizes=node_sizes)
    coms = [g.vs[x]['name'] for x in part]
    return NodeClustering(coms, g_original, "Surprise", method_parameters={"initial_membership": initial_membership,
                                                                  "weights": weights, "node_sizes": node_sizes})
Beispiel #28
0
def oslom(graph, t=0.5, cp=0.5, seed=13)->List[List[int]]:
    '''
    graph - NetworkX
    t - threashould ?
    cp  - xxx?
    seed - random seed
    '''
    inputFile = 'oslom_tmp_input.txt'
    z = get_ipython().getoutput(f'rm -f {inputFile} || true')
    z = get_ipython().getoutput('rm -f tp || true')
    z = get_ipython().getoutput('rf -rf oslom_tmp_input.txt_oslo_files || true')
    nx.write_edgelist(graph, inputFile, data=False)
    param = f'-t {t} -cp {cp} -seed {seed}'
    cmd = f'../related_methods/OSLOM2/oslom_undir -f {inputFile} -uw {param}'
    tmp = get_ipython().getoutput(cmd)
    lines = get_ipython().getoutput('cat oslom_tmp_input.txt_oslo_files/partitions_level_0')
    com = [ [int(v) for v in line.split()] for line in lines if not line.startswith('#')]
    return NodeClustering(communities=com, graph=graph, method_name='oslom', method_parameters=param, overlap=isOverlap(com))
def add_communities_to_graph(clustering: NodeClustering):
    """
    Assign community labels to nodes of the graph, propagating community labels
    from higher levels down the tree.
    """
    community_attrs = {}
    cluster_object_attrs = {}
    hG = hierarchy_graph(clustering.graph)
    for node_key, community_ids in clustering.to_node_community_map().items():
        node_with_descendants = [node_key
                                 ] + [n for n in nx.descendants(hG, node_key)]
        for node in node_with_descendants:
            community_attrs[node] = community_ids
        cluster_object_attrs[node] = True

    nx.set_node_attributes(clustering.graph, community_attrs, "communities")
    nx.set_node_attributes(clustering.graph, cluster_object_attrs,
                           "clusterobject")
Beispiel #30
0
def multicom(g_original, seed_node):
    """
    MULTICOM is an algorithm for detecting multiple local communities, possibly overlapping, by expanding the initial seed set.
    This algorithm uses local scoring metrics to define an embedding of the graph around the seed set. Based on this embedding, it picks new seeds in the neighborhood of the original seed set, and uses these new seeds to recover multiple communities.

    :param g_original: a networkx/igraph object
    :param seed_node: Id of the seed node around which we want to detect communities.
    :return: EdgeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.multicom(G, seed_node=0)

    :References:

    Hollocou, Alexandre, Thomas Bonald, and Marc Lelarge. `Multiple Local Community Detection. <https://hal.archives-ouvertes.fr/hal-01625444/document/>`_ ACM SIGMETRICS Performance Evaluation Review 45.2 (2018): 76-83.

    .. note:: Reference implementation: https://github.com/ahollocou/multicom

    """

    g = convert_graph_formats(g_original, nx.Graph)
    g, maps = nx_node_integer_mapping(g)

    mc = MultiCom(g)
    coms = mc.execute(seed_node)

    if maps is not None:
        communities = []
        for c in coms:
            communities.append([maps[n] for n in c])
        nx.relabel_nodes(g, maps, False)
    else:
        communities = [list(c) for c in coms]

    return NodeClustering(communities,
                          g_original,
                          "Multicom",
                          method_parameters={"seeds": seed_node},
                          overlap=True)