Example #1
 def new_func(*args, **kw):
     # Here we assume that the first two arguments are (G, partition).
     if not is_partition(*args[:2]):
         raise nx.NetworkXError(
             "`partition` is not a valid partition of" " the nodes of G"
     return func(*args, **kw)
Example #2
def _require_partition(G, partition):
    """Decorator to check that a valid partition is input to a function

    Raises :exc:`networkx.NetworkXError` if the partition is not valid.

    This decorator should be used on functions whose first two arguments
    are a graph and a partition of the nodes of that graph (in that

        >>> @require_partition
        ... def foo(G, partition):
        ...     print("partition is valid!")
        >>> G = nx.complete_graph(5)
        >>> partition = [{0, 1}, {2, 3}, {4}]
        >>> foo(G, partition)
        partition is valid!
        >>> partition = [{0}, {2, 3}, {4}]
        >>> foo(G, partition)
        Traceback (most recent call last):
        networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
        >>> partition = [{0, 1}, {1, 2, 3}, {4}]
        >>> foo(G, partition)
        Traceback (most recent call last):
        networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G

    if is_partition(G, partition):
        return G, partition
    raise nx.NetworkXError(
        "`partition` is not a valid partition of the nodes of G")
Example #3
Example #4
Example #5
def test_generator():
    n = 250
    tau1 = 3
    tau2 = 1.5
    mu = 0.1
    G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5,
                            min_community=20, seed=10)
    assert_equal(len(G), 250)
    C = {frozenset(G.nodes[v]['community']) for v in G}
    assert_true(is_partition(G.nodes(), C))
def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
    n = len(G)
    labels = list(G)
    index = {v: i for i, v in enumerate(labels)}

    if partition is None:
        side = [0] * (n // 2) + [1] * ((n + 1) // 2)
            A, B = partition
        except (TypeError, ValueError) as e:
            raise nx.NetworkXError("partition must be two sets") from e
        if not is_partition(G, (A, B)):
            raise nx.NetworkXError("partition invalid")
        side = [0] * n
        for a in A:
            side[index[a]] = 1

    if G.is_multigraph():
        edges = [
                (index[u], sum(e.get(weight, 1) for e in d.values()))
                for u, d in G[v].items()
            for v in labels
        edges = [
            [(index[u], e.get(weight, 1)) for u, e in G[v].items()] for v in labels

    for i in range(max_iter):
        costs = list(_kernighan_lin_sweep(edges, side))
        min_cost, min_i, _ = min(costs)
        if min_cost >= 0:

        for _, _, (u, v) in costs[: min_i + 1]:
            side[u] = 1
            side[v] = 0

    A = {u for u, s in zip(labels, side) if s == 0}
    B = {u for u, s in zip(labels, side) if s == 1}
    return A, B
Example #7
Example #8
Example #9
Example #10
 def new_func(*args, **kw):
     # Here we assume that the first two arguments are (G, partition).
     if not is_partition(*args[:2]):
         raise nx.NetworkXError('`partition` is not a valid partition of'
                                ' the nodes of G')
     return func(*args, **kw)
Example #11
Example #12
Example #13
def modularity(G, communities, weight="weight", resolution=1):
    r"""Returns the modularity of the given partition of the graph.

    Modularity is defined in [1]_ as

    .. math::
        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)

    where $m$ is the number of edges, $A$ is the adjacency matrix of `G`,
    $k_i$ is the degree of $i$, $\gamma$ is the resolution parameter,
    and $\delta(c_i, c_j)$ is 1 if $i$ and $j$ are in the same community else 0.

    According to [2]_ (and verified by some algebra) this can be reduced to

    .. math::
       Q = \sum_{c=1}^{n}
       \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]

    where the sum iterates over all communities $c$, $m$ is the number of edges,
    $L_c$ is the number of intra-community links for community $c$,
    $k_c$ is the sum of degrees of the nodes in community $c$,
    and $\gamma$ is the resolution parameter.

    The resolution parameter sets an arbitrary tradeoff between intra-group
    edges and inter-group edges. More complex grouping patterns can be
    discovered by analyzing the same network with multiple values of gamma
    and then combining the results [3]_. That said, it is very common to
    simply use gamma=1. More on the choice of gamma is in [4]_.

    The second formula is the one actually used in calculation of the modularity.
    For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.

    G : NetworkX Graph

    communities : list or iterable of set of nodes
        These node sets must represent a partition of G's nodes.

    weight : string or None, optional (default="weight")
            The edge attribute that holds the numerical value used
            as a weight. If None or an edge does not have that attribute,
            then that edge has weight 1.

    resolution : float (default=1)
        If resolution is less than 1, modularity favors larger communities.
        Greater than 1 favors smaller communities.

    Q : float
        The modularity of the paritition.

        If `communities` is not a partition of the nodes of `G`.

    >>> import networkx.algorithms.community as nx_comm
    >>> G = nx.barbell_graph(3, 0)
    >>> nx_comm.modularity(G, [{0, 1, 2}, {3, 4, 5}])
    >>> nx_comm.modularity(G, nx_comm.label_propagation_communities(G))

    .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
       Oxford University Press, 2011.
    .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
       "Finding community structure in very large networks."
       Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
    .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
       Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
    .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
       maximum likelihood methods for community detection"
       Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315

    if not isinstance(communities, list):
        communities = list(communities)
    if not is_partition(G, communities):
        raise NotAPartition(G, communities)

    directed = G.is_directed()
    if directed:
        out_degree = dict(G.out_degree(weight=weight))
        in_degree = dict(G.in_degree(weight=weight))
        m = sum(out_degree.values())
        norm = 1 / m**2
        out_degree = in_degree = dict(G.degree(weight=weight))
        deg_sum = sum(out_degree.values())
        m = deg_sum / 2
        norm = 1 / deg_sum**2

    def community_contribution(community):
        comm = set(community)
        L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1)
                  if v in comm)

        out_degree_sum = sum(out_degree[u] for u in comm)
        in_degree_sum = sum(in_degree[u]
                            for u in comm) if directed else out_degree_sum

        return L_c / m - resolution * out_degree_sum * in_degree_sum * norm

    return sum(map(community_contribution, communities))
Example #14
def main():

  # Load data
  if path.exists("../data/cmty_nodes.csv"):
    node_upload = "../data/cmty_nodes.csv"
  elif path.exists("../data/nodes.csv"):
    node_upload = "../data/nodes.csv"
    print("NO NODES TO UPLOAD!")
  pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0)

  # Data in nice form
  headers = list(pd_nodes.columns)
  nodes = np.asarray(pd_nodes)

  # Load social network accordingly
  if path.exists("../data/youtube.graph"):
    FIn = snap.TFIn("../data/youtube.graph")
    social_network = snap.TNGraph.Load(FIn)
    edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
    edges = np.asarray(edges).astype(int)
    social_network = data2dag(edges, nodes.shape[0])

  # Check for self edges
  for e in social_network.Edges():
    if e.GetSrcNId() == e.GetDstNId():
      print("Self Loop Found:",e.GetSrcNId())

  # CNM Algorithm from snap.py
  print("Computing CNM")
  start = timeit.default_timer()
  CmtyV = snap.TCnComV()
  undirected = snap.ConvertGraph(snap.PUNGraph, social_network)
  the_modularity = snap.CommunityCNM(undirected, CmtyV)
  stop = timeit.default_timer()
  node_to_cmty = np.zeros(nodes.shape[0]).astype(int)
  cmty_sizes = np.zeros(len(CmtyV))
  for i in range(len(CmtyV)):
    for node in CmtyV[i]:
      node_to_cmty[node] = i
    cmty_sizes[i] = len(CmtyV[i])
  cmtys = [[node for node in cmty] for cmty in CmtyV]
  m = 0
  for i in range(len(CmtyV)):
    Nodes = snap.TIntV()
    for elem in CmtyV[i]:
    m += snap.GetModularity(social_network, Nodes, social_network.GetEdges())
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_edges_from(list(map(tuple, edges)))

  # Add communities to nodes
  col_name = "cnm_cmty"
  pd_nodes[col_name] = node_to_cmty
  pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t')

  assert(is_partition(G, cmtys))

  print("Calculating Modularity")
  modul = modularity(G, cmtys)
  print("Results from Clauset-Newman-Moore:")
  print("Number of clusters:",len(CmtyV))
  print("Time elapsed:",stop - start)

  # Fun category stuff to do
  upload_col = headers.index('category')
  categories = set()
  for i in range(nodes.shape[0]):
  idx_to_categories = list(categories)
  print("Number of categories:",len(idx_to_categories))
  categories_to_idx = dict()
  for i in range(len(idx_to_categories)):
    categories_to_idx[idx_to_categories[i]] = i

  # Communities and categories
  cmty_category_count = np.zeros((len(CmtyV),len(idx_to_categories)))
  for i in range(nodes.shape[0]):
    cmty_category_count[int(node_to_cmty[i]),categories_to_idx[nodes[i][upload_col]]] += 1
  cmty_category_count = cmty_category_count/cmty_sizes[:,np.newaxis]

  # Create graphs per category
  for i in range(len(idx_to_categories)):
    if (str(idx_to_categories[i]) != "nan") and (idx_to_categories[i] != " UNA "):
      plt.plot(sorted(cmty_category_count[:,i], reverse=True), label=idx_to_categories[i])
  plt.title("Category Proportions in Clusters")
  plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
  plt.savefig("../figures/category_proportions_clusters.png", bbox_inches="tight")
  for i in range(cmty_category_count.shape[0]):
    top_category = np.argmax(cmty_category_count[i])
    print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category])

Example #15
Example #16
def silhouettes(G, particion, silencioso=False):
    Calcula el valor de silhouette para cada nodo del grafo 'G' dada una
    partición 'particion' como lista de listas. Dicho valor está dado por
    s(i) = (b(i) - a(i)) / max(a(i), b(i))
    donde a(i) es la distancia media a todos los nodos del mismo cluster que i
    y b(i) es la mínima de las distancias medias a los distintos clusters a los
    cuales no pertenece i. Para mayor claridad, sea c_i el cluster al que
    pertenece i, y sea Q = particion - c_i el conjunto de los clusters a los cuales
    no pertenece i. Entonces se define
    b(i) = min{promedio{d(i,j) : j in cluster} : cluster in Q}
    b(i) también se suele llamar "distancia media al cluster más cercano".

    G : nx.Graph
    particion : list
        lista de listas. Cada sublista es un cluster y sus elementos son los
        nombres de los nodos que pertenecen a dicho cluster.
    output : list
        lista de listas. Cada sublista es un cluster y sus elementos son los
        valores de silhouette para cada nodo, preservando el orden del input.
    if not is_partition(G, particion):
        raise NotAPartition(G, particion)

    ds = list(nx.all_pairs_shortest_path_length(G))
    d = lambda i, j: ds[i][1][j]
    # ds[i][1][j] es la distancia (longitud del camino más corto)
    # entre i y j

    n = G.order()
    nc = len(particion)
    # Creamos lista de lista con iguales longitudes que 'particion'
    s_values = [[[] for n in range(len(particion[m]))] for m in range(nc)]
    # Las listas vacías son "dummies" o "placeholders" para los valores
    # de silhouette, que irán reemplazándolas.
    nodos_to_indices = crear_nodos_to_indices(particion)
    # Recorremos los nodos en el ordenamiento global correspondiente
    # a la función distancia 'd'
    for i, nodo in enumerate(G.nodes()):
        m, n = nodos_to_indices[nodo]
        cluster_actual = particion[m]
        otros_clusters = (particion[l] for l in range(nc) if l != m)
        a = np.average([d(i, j) for j in cluster_actual])
            dists_interclusters = [np.average([d(i,j) for j in cluster if j != i]) \
                                                for cluster in otros_clusters]
        except KeyError:
            if not silencioso:
                    'El grafo no es conexo y la distancia entre algunos clusters',
                    'es infinita por lo que no se puede realizar por completo el',
                    'análisis de silhouettes. Devolviendo lista vacía.')
            return []
            b = min(dists_interclusters)
        except ValueError:
            if not silencioso:
                    'La partición tiene un solo elemento. Devolviendo lista vacía.'
            return []
        s_values[m][n] = (b - a) / max(a, b)
    return s_values
Example #18
