Example #1
0
 def new_func(*args, **kw):
     # Here we assume that the first two arguments are (G, partition).
     if not is_partition(*args[:2]):
         raise nx.NetworkXError(
             "`partition` is not a valid partition of" " the nodes of G"
         )
     return func(*args, **kw)
Example #2
0
def _require_partition(G, partition):
    """Decorator to check that a valid partition is input to a function

    Raises :exc:`networkx.NetworkXError` if the partition is not valid.

    This decorator should be used on functions whose first two arguments
    are a graph and a partition of the nodes of that graph (in that
    order)::

        >>> @require_partition
        ... def foo(G, partition):
        ...     print("partition is valid!")
        ...
        >>> G = nx.complete_graph(5)
        >>> partition = [{0, 1}, {2, 3}, {4}]
        >>> foo(G, partition)
        partition is valid!
        >>> partition = [{0}, {2, 3}, {4}]
        >>> foo(G, partition)
        Traceback (most recent call last):
          ...
        networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
        >>> partition = [{0, 1}, {1, 2, 3}, {4}]
        >>> foo(G, partition)
        Traceback (most recent call last):
          ...
        networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G

    """
    if is_partition(G, partition):
        return G, partition
    raise nx.NetworkXError(
        "`partition` is not a valid partition of the nodes of G")
Example #3
0
def kernighan_lin_bisection(G, partition=None, max_iter=100, weight='weight'):

    # If no partition is provided, split the nodes randomly into a
    # balanced partition.
    for div in range(2, 8):
        if partition is None:
            nodes = list(G)
            random.shuffle(nodes)
            h = len(nodes) // div
            partition = (nodes[:h], nodes[h:])
        # Make a copy of the partition as a pair of sets.
        try:
            A, B = set(partition[0]), set(partition[1])
        except:
            raise ValueError('partition must be two sets')
        if not is_partition(G, (A, B)):
            raise nx.NetworkXError('partition invalid')
        for i in range(max_iter):
            # `gains` is a list of triples of the form (g, u, v) for each
            # node pair (u, v), where `g` is the gain of that node pair.
            gains = _kernighan_lin_pass(G, A, B, weight)
            csum = list(nx.utils.accumulate(g for g, u, v in gains))
            max_cgain = max(csum)
            if max_cgain <= 0:
                break
            # Get the node pairs up to the index of the maximum cumulative
            # gain, and collect each `u` into `anodes` and each `v` into
            # `bnodes`, for each pair `(u, v)`.
            index = csum.index(max_cgain)
            nodesets = islice(zip(*gains[:index + 1]), 1, 3)
            anodes, bnodes = (set(s) for s in nodesets)
            A |= bnodes
            A -= anodes
            B |= anodes
            B -= bnodes
            print(str(i) + '/' + str((max_iter)))
            color = np.zeros(len(input_nodes))
            for q in range(len(np.array(list(A)))):
                color[np.where(input_nodes == np.array(list(A))[q])] = aa
            nx.draw_networkx(g,
                             with_labels=True,
                             node_color=color,
                             pos=p,
                             node_size=100,
                             font_size=3,
                             font_color='w')
            tEnd = time.time()
            plt.title('ratio:' + str(div) + '    epoch:' + str(i) +
                      '     time:' + str(int(tEnd - tStart)))
            plt.savefig(
                'H:/master/code/python/networkScience/week10/pic_kl/{:03d}{}.png'
                .format(i, aa),
                format='png')
            plt.clf()
            # plt.show()
        return A, B
Example #4
0
def test_generator():
    n = 250
    tau1 = 3
    tau2 = 1.5
    mu = 0.1
    G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5,
                            min_community=20, seed=10)
    assert_equal(len(G), 250)
    C = {frozenset(G.node[v]['community']) for v in G}
    assert_true(is_partition(G.nodes(), C))
Example #5
0
def test_generator():
    n = 250
    tau1 = 3
    tau2 = 1.5
    mu = 0.1
    G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5,
                            min_community=20, seed=10)
    assert_equal(len(G), 250)
    C = {frozenset(G.nodes[v]['community']) for v in G}
    assert_true(is_partition(G.nodes(), C))
def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
    n = len(G)
    labels = list(G)
    seed.shuffle(labels)
    index = {v: i for i, v in enumerate(labels)}

    if partition is None:
        side = [0] * (n // 2) + [1] * ((n + 1) // 2)
    else:
        try:
            A, B = partition
        except (TypeError, ValueError) as e:
            raise nx.NetworkXError("partition must be two sets") from e
        if not is_partition(G, (A, B)):
            raise nx.NetworkXError("partition invalid")
        side = [0] * n
        for a in A:
            side[index[a]] = 1

    if G.is_multigraph():
        edges = [
            [
                (index[u], sum(e.get(weight, 1) for e in d.values()))
                for u, d in G[v].items()
            ]
            for v in labels
        ]
    else:
        edges = [
            [(index[u], e.get(weight, 1)) for u, e in G[v].items()] for v in labels
        ]

    for i in range(max_iter):
        costs = list(_kernighan_lin_sweep(edges, side))
        min_cost, min_i, _ = min(costs)
        if min_cost >= 0:
            break

        for _, _, (u, v) in costs[: min_i + 1]:
            side[u] = 1
            side[v] = 0

    A = {u for u, s in zip(labels, side) if s == 0}
    B = {u for u, s in zip(labels, side) if s == 1}
    return A, B
Example #7
0
def modularity(G, communities, weight='weight'):
    r"""Returns the modularity of the given partition of the graph.

    Modularity is defined in [1]_ as

    .. math::

        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m}\right)
            \delta(c_i,c_j)

    where $m$ is the number of edges, $A$ is the adjacency matrix of
    `G`, $k_i$ is the degree of $i$ and $\delta(c_i, c_j)$
    is 1 if $i$ and $j$ are in the same community and 0 otherwise.

    Parameters
    ----------
    G : NetworkX Graph

    communities : list
        List of sets of nodes of `G` representing a partition of the
        nodes.

    Returns
    -------
    Q : float
        The modularity of the paritition.

    Raises
    ------
    NotAPartition
        If `communities` is not a partition of the nodes of `G`.

    Examples
    --------
    >>> G = nx.barbell_graph(3, 0)
    >>> nx.algorithms.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
    0.35714285714285704

    References
    ----------
    .. [1] M. E. J. Newman *Networks: An Introduction*, page 224.
       Oxford University Press, 2011.

    """
    if not is_partition(G, communities):
        raise NotAPartition(G, communities)

    multigraph = G.is_multigraph()
    directed = G.is_directed()
    m = G.size(weight=weight)
    if directed:
        out_degree = dict(G.out_degree(weight=weight))
        in_degree = dict(G.in_degree(weight=weight))
        norm = 1 / m
    else:
        out_degree = dict(G.degree(weight=weight))
        in_degree = out_degree
        norm = 1 / (2 * m)

    def val(u, v):
        try:
            if multigraph:
                w = sum(d.get(weight, 1) for k, d in G[u][v].items())
            else:
                w = G[u][v].get(weight, 1)
        except KeyError:
            w = 0
        # Double count self-loops if the graph is undirected.
        if u == v and not directed:
            w *= 2
        return w - in_degree[u] * out_degree[v] * norm

    Q = sum(val(u, v) for c in communities for u, v in product(c, repeat=2))
    return Q * norm
Example #8
0
def main():

    # Column name
    col_name = "ALGORITHM_cmty"

    # Load data
    if path.exists("../data/cmty_nodes.csv"):
        node_upload = "../data/cmty_nodes.csv"
    elif path.exists("../data/nodes.csv"):
        node_upload = "../data/nodes.csv"
    else:
        print("NO NODES TO UPLOAD!")
        assert (False)
    pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0)

    # Data in nice form
    headers = list(pd_nodes.columns)
    nodes = np.asarray(pd_nodes)

    # Aggregate file names
    model_names = ["GAT", "GCN", "GraphSage"]
    npy_names = ["../data/" + x + "_node_embeddings.npy" for x in model_names]

    model_cmtys = []
    model_time = []
    for i in range(len(npy_names)):

        # Load embeddings
        embeddings = np.load(npy_names[i])
        print(embeddings.shape)

        # Generate node_mapping for clutsers
        start = timeit.default_timer()
        ##########################################
        # CODE HERE to cluster embeddings and creating node_mapping #
        # node_mapping can either be dictionary or array #
        ##########################################

        node_mapping = np.zeros(len(nodes)).astype(int)

        ##########################################
        stop = timeit.default_timer()
        model_time.append(stop - start)

        # Convert node_mapping to cmtys and node_to_cmty array
        #num_cmtys = len(set(node_mapping.values()))
        num_cmtys = len(set(node_mapping))
        cmtys = [[] for _ in range(num_cmtys)]
        node_to_cmty = np.zeros(len(node_mapping)).astype(int)
        for j in range(len(node_to_cmty)):
            node_to_cmty[j] = node_mapping[j]
            cmtys[node_mapping[j]].append(j)
        model_cmtys.append(cmtys)

        # Add communities to nodes
        pd_nodes[model_names[i] + "_" + col_name] = node_to_cmty
        pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t')

    print("Creating Graph")
    # Load social network accordingly
    edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
    edges = np.asarray(edges).astype(int)
    G = nx.Graph()
    G.add_nodes_from(range(nodes.shape[0]))
    G.add_edges_from(list(map(tuple, edges)))

    print("Calculating modularity")

    for i in range(len(model_names)):
        assert (is_partition(G, model_cmtys[i]))
        modul = modularity(G, model_cmtys[i])

        print("Results from " + model_names[i] + " ALGORITHM:")
        print("Modularity:", modul)
        print("Number of clusters:", len(model_cmtys[i]))
        print("Time elapsed:", model_time[i])
Example #9
0
def kernighan_lin_bisection(G,
                            partition=None,
                            max_iter=10,
                            weight='weight',
                            seed=None):
    """Partition a graph into two blocks using the Kernighan–Lin
    algorithm.

    This algorithm partitions a network into two sets by iteratively
    swapping pairs of nodes to reduce the edge cut between the two sets.  The
    pairs are chosen according to a modified form of Kernighan-Lin, which
    moves node individually, alternating between sides to keep the bisection
    balanced.

    Parameters
    ----------
    G : graph

    partition : tuple
        Pair of iterables containing an initial partition. If not
        specified, a random balanced partition is used.

    max_iter : int
        Maximum number of times to attempt swaps to find an
        improvemement before giving up.

    weight : key
        Edge data key to use as weight. If None, the weights are all
        set to one.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.
        Only used if partition is None

    Returns
    -------
    partition : tuple
        A pair of sets of nodes representing the bipartition.

    Raises
    -------
    NetworkXError
        If partition is not a valid partition of the nodes of the graph.

    References
    ----------
    .. [1] Kernighan, B. W.; Lin, Shen (1970).
       "An efficient heuristic procedure for partitioning graphs."
       *Bell Systems Technical Journal* 49: 291--307.
       Oxford University Press 2011.

    """
    n = len(G)
    labels = list(G)
    seed.shuffle(labels)
    index = {v: i for i, v in enumerate(labels)}

    if partition is None:
        side = [0] * (n // 2) + [1] * ((n + 1) // 2)
    else:
        try:
            A, B = partition
        except (TypeError, ValueError):
            raise nx.NetworkXError('partition must be two sets')
        if not is_partition(G, (A, B)):
            raise nx.NetworkXError('partition invalid')
        side = [0] * n
        for a in A:
            side[a] = 1

    if G.is_multigraph():
        edges = [[(index[u], sum(e.get(weight, 1) for e in d.values()))
                  for u, d in G[v].items()] for v in labels]
    else:
        edges = [[(index[u], e.get(weight, 1)) for u, e in G[v].items()]
                 for v in labels]

    for i in range(max_iter):
        costs = list(_kernighan_lin_sweep(edges, side))
        min_cost, min_i, _ = min(costs)
        if min_cost >= 0:
            break

        for _, _, (u, v) in costs[:min_i + 1]:
            side[u] = 1
            side[v] = 0

    A = set(u for u, s in zip(labels, side) if s == 0)
    B = set(u for u, s in zip(labels, side) if s == 1)
    return A, B
Example #10
0
 def new_func(*args, **kw):
     # Here we assume that the first two arguments are (G, partition).
     if not is_partition(*args[:2]):
         raise nx.NetworkXError('`partition` is not a valid partition of'
                                ' the nodes of G')
     return func(*args, **kw)
Example #11
0
def modularity(G, communities, weight='weight'):
    r"""Returns the modularity of the given partition of the graph.

    Modularity is defined in [1]_ as

    .. math::

        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m}\right)
            \delta(c_i,c_j)

    where *m* is the number of edges, *A* is the adjacency matrix of
    `G`, :math:`k_i` is the degree of *i* and :math:`\delta(c_i, c_j)`
    is 1 if *i* and *j* are in the same community and 0 otherwise.

    Parameters
    ----------
    G : NetworkX Graph

    communities : list
        List of sets of nodes of `G` representing a partition of the
        nodes.

    Returns
    -------
    Q : float
        The modularity of the paritition.

    Raises
    ------
    NotAPartition
        If `communities` is not a partition of the nodes of `G`.

    Examples
    --------
    >>> G = nx.barbell_graph(3, 0)
    >>> nx.algorithms.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
    0.35714285714285704

    References
    ----------
    .. [1] M. E. J. Newman *Networks: An Introduction*, page 224.
       Oxford University Press, 2011.

    """
    if not is_partition(G, communities):
        raise NotAPartition(G, communities)

    multigraph = G.is_multigraph()
    directed = G.is_directed()
    m = G.size(weight=weight)
    if directed:
        out_degree = dict(G.out_degree(weight=weight))
        in_degree = dict(G.in_degree(weight=weight))
        norm = 1 / m
    else:
        out_degree = dict(G.degree(weight=weight))
        in_degree = out_degree
        norm = 1 / (2 * m)

    def val(u, v):
        try:
            if multigraph:
                w = sum(d.get(weight, 1) for k, d in G[u][v].items())
            else:
                w = G[u][v].get(weight, 1)
        except KeyError:
            w = 0
        # Double count self-loops if the graph is undirected.
        if u == v and not directed:
            w *= 2
        return w - in_degree[u] * out_degree[v] * norm

    Q = sum(val(u, v) for c in communities for u, v in product(c, repeat=2))
    return Q * norm
Example #12
0
def kernighan_lin_bisection(G, partition=None, max_iter=10, weight='weight',
                            seed=None):
    """Partition a graph into two blocks using the Kernighan–Lin
    algorithm.

    This algorithm paritions a network into two sets by iteratively
    swapping pairs of nodes to reduce the edge cut between the two sets.

    Parameters
    ----------
    G : graph

    partition : tuple
        Pair of iterables containing an initial partition. If not
        specified, a random balanced partition is used.

    max_iter : int
        Maximum number of times to attempt swaps to find an
        improvemement before giving up.

    weight : key
        Edge data key to use as weight. If None, the weights are all
        set to one.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.
        Only used if partition is None

    Returns
    -------
    partition : tuple
        A pair of sets of nodes representing the bipartition.

    Raises
    -------
    NetworkXError
        If partition is not a valid partition of the nodes of the graph.

    References
    ----------
    .. [1] Kernighan, B. W.; Lin, Shen (1970).
       "An efficient heuristic procedure for partitioning graphs."
       *Bell Systems Technical Journal* 49: 291--307.
       Oxford University Press 2011.

    """
    # If no partition is provided, split the nodes randomly into a
    # balanced partition.
    if partition is None:
        nodes = list(G)
        seed.shuffle(nodes)
        h = len(nodes) // 2
        partition = (nodes[:h], nodes[h:])
    # Make a copy of the partition as a pair of sets.
    try:
        A, B = set(partition[0]), set(partition[1])
    except:
        raise ValueError('partition must be two sets')
    if not is_partition(G, (A, B)):
        raise nx.NetworkXError('partition invalid')
    for i in range(max_iter):
        # `gains` is a list of triples of the form (g, u, v) for each
        # node pair (u, v), where `g` is the gain of that node pair.
        gains = _kernighan_lin_pass(G, A, B, weight)
        csum = list(nx.utils.accumulate(g for g, u, v in gains))
        max_cgain = max(csum)
        if max_cgain <= 0:
            break
        # Get the node pairs up to the index of the maximum cumulative
        # gain, and collect each `u` into `anodes` and each `v` into
        # `bnodes`, for each pair `(u, v)`.
        index = csum.index(max_cgain)
        nodesets = islice(zip(*gains[:index + 1]), 1, 3)
        anodes, bnodes = (set(s) for s in nodesets)
        A |= bnodes
        A -= anodes
        B |= anodes
        B -= bnodes
    return A, B
Example #13
0
def modularity(G, communities, weight="weight", resolution=1):
    r"""Returns the modularity of the given partition of the graph.

    Modularity is defined in [1]_ as

    .. math::
        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
            \delta(c_i,c_j)

    where $m$ is the number of edges, $A$ is the adjacency matrix of `G`,
    $k_i$ is the degree of $i$, $\gamma$ is the resolution parameter,
    and $\delta(c_i, c_j)$ is 1 if $i$ and $j$ are in the same community else 0.

    According to [2]_ (and verified by some algebra) this can be reduced to

    .. math::
       Q = \sum_{c=1}^{n}
       \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]

    where the sum iterates over all communities $c$, $m$ is the number of edges,
    $L_c$ is the number of intra-community links for community $c$,
    $k_c$ is the sum of degrees of the nodes in community $c$,
    and $\gamma$ is the resolution parameter.

    The resolution parameter sets an arbitrary tradeoff between intra-group
    edges and inter-group edges. More complex grouping patterns can be
    discovered by analyzing the same network with multiple values of gamma
    and then combining the results [3]_. That said, it is very common to
    simply use gamma=1. More on the choice of gamma is in [4]_.

    The second formula is the one actually used in calculation of the modularity.
    For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.

    Parameters
    ----------
    G : NetworkX Graph

    communities : list or iterable of set of nodes
        These node sets must represent a partition of G's nodes.

    weight : string or None, optional (default="weight")
            The edge attribute that holds the numerical value used
            as a weight. If None or an edge does not have that attribute,
            then that edge has weight 1.

    resolution : float (default=1)
        If resolution is less than 1, modularity favors larger communities.
        Greater than 1 favors smaller communities.

    Returns
    -------
    Q : float
        The modularity of the paritition.

    Raises
    ------
    NotAPartition
        If `communities` is not a partition of the nodes of `G`.

    Examples
    --------
    >>> import networkx.algorithms.community as nx_comm
    >>> G = nx.barbell_graph(3, 0)
    >>> nx_comm.modularity(G, [{0, 1, 2}, {3, 4, 5}])
    0.35714285714285715
    >>> nx_comm.modularity(G, nx_comm.label_propagation_communities(G))
    0.35714285714285715

    References
    ----------
    .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
       Oxford University Press, 2011.
    .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
       "Finding community structure in very large networks."
       Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
    .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
       Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
    .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
       maximum likelihood methods for community detection"
       Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315

    """
    if not isinstance(communities, list):
        communities = list(communities)
    if not is_partition(G, communities):
        raise NotAPartition(G, communities)

    directed = G.is_directed()
    if directed:
        out_degree = dict(G.out_degree(weight=weight))
        in_degree = dict(G.in_degree(weight=weight))
        m = sum(out_degree.values())
        norm = 1 / m**2
    else:
        out_degree = in_degree = dict(G.degree(weight=weight))
        deg_sum = sum(out_degree.values())
        m = deg_sum / 2
        norm = 1 / deg_sum**2

    def community_contribution(community):
        comm = set(community)
        L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1)
                  if v in comm)

        out_degree_sum = sum(out_degree[u] for u in comm)
        in_degree_sum = sum(in_degree[u]
                            for u in comm) if directed else out_degree_sum

        return L_c / m - resolution * out_degree_sum * in_degree_sum * norm

    return sum(map(community_contribution, communities))
Example #14
0
def main():

  # Load data
  if path.exists("../data/cmty_nodes.csv"):
    node_upload = "../data/cmty_nodes.csv"
  elif path.exists("../data/nodes.csv"):
    node_upload = "../data/nodes.csv"
  else:
    print("NO NODES TO UPLOAD!")
    assert(False)
  pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0)

  # Data in nice form
  headers = list(pd_nodes.columns)
  nodes = np.asarray(pd_nodes)

  # Load social network accordingly
  if path.exists("../data/youtube.graph"):
    FIn = snap.TFIn("../data/youtube.graph")
    social_network = snap.TNGraph.Load(FIn)
  else:
    edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
    edges = np.asarray(edges).astype(int)
    social_network = data2dag(edges, nodes.shape[0])

  # Check for self edges
  for e in social_network.Edges():
    if e.GetSrcNId() == e.GetDstNId():
      print("Self Loop Found:",e.GetSrcNId())

  # CNM Algorithm from snap.py
  print("Computing CNM")
  start = timeit.default_timer()
  CmtyV = snap.TCnComV()
  undirected = snap.ConvertGraph(snap.PUNGraph, social_network)
  snap.DelSelfEdges(undirected)
  the_modularity = snap.CommunityCNM(undirected, CmtyV)
  stop = timeit.default_timer()
  node_to_cmty = np.zeros(nodes.shape[0]).astype(int)
  cmty_sizes = np.zeros(len(CmtyV))
  for i in range(len(CmtyV)):
    for node in CmtyV[i]:
      node_to_cmty[node] = i
    cmty_sizes[i] = len(CmtyV[i])
  cmtys = [[node for node in cmty] for cmty in CmtyV]
  '''
  m = 0
  for i in range(len(CmtyV)):
    Nodes = snap.TIntV()
    for elem in CmtyV[i]:
      Nodes.Add(int(elem))
    m += snap.GetModularity(social_network, Nodes, social_network.GetEdges())
  '''
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_nodes_from(range(nodes.shape[0]))
  G.add_edges_from(list(map(tuple, edges)))

  # Add communities to nodes
  col_name = "cnm_cmty"
  pd_nodes[col_name] = node_to_cmty
  pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t')


  assert(is_partition(G, cmtys))

  print("Calculating Modularity")
  modul = modularity(G, cmtys)
  print("Results from Clauset-Newman-Moore:")
  print("Modularity:",modul)
  print("Number of clusters:",len(CmtyV))
  print("Time elapsed:",stop - start)


  # Fun category stuff to do
  '''
  upload_col = headers.index('category')
  categories = set()
  for i in range(nodes.shape[0]):
    categories.add(nodes[i][upload_col])
  idx_to_categories = list(categories)
  print("Number of categories:",len(idx_to_categories))
  categories_to_idx = dict()
  for i in range(len(idx_to_categories)):
    categories_to_idx[idx_to_categories[i]] = i

  # Communities and categories
  cmty_category_count = np.zeros((len(CmtyV),len(idx_to_categories)))
  for i in range(nodes.shape[0]):
    cmty_category_count[int(node_to_cmty[i]),categories_to_idx[nodes[i][upload_col]]] += 1
  cmty_category_count = cmty_category_count/cmty_sizes[:,np.newaxis]
  '''


  # Create graphs per category
  '''
  plt.figure()
  for i in range(len(idx_to_categories)):
    if (str(idx_to_categories[i]) != "nan") and (idx_to_categories[i] != " UNA "):
      plt.plot(sorted(cmty_category_count[:,i], reverse=True), label=idx_to_categories[i])
  plt.title("Category Proportions in Clusters")
  plt.xlabel("Cluster")
  plt.ylabel("Proportion")
  plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
  plt.savefig("../figures/category_proportions_clusters.png", bbox_inches="tight")
  '''
  '''
  for i in range(cmty_category_count.shape[0]):
    top_category = np.argmax(cmty_category_count[i])
    print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category])
  '''





  '''
Example #15
0
def kernighan_lin_bisection(G,
                            partition=None,
                            max_iter=10,
                            weight='weight',
                            seed=None):
    """Partition a graph into two blocks using the Kernighan–Lin
    algorithm.

    This algorithm paritions a network into two sets by iteratively
    swapping pairs of nodes to reduce the edge cut between the two sets.

    Parameters
    ----------
    G : graph

    partition : tuple
        Pair of iterables containing an initial partition. If not
        specified, a random balanced partition is used.

    max_iter : int
        Maximum number of times to attempt swaps to find an
        improvemement before giving up.

    weight : key
        Edge data key to use as weight. If None, the weights are all
        set to one.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.
        Only used if partition is None

    Returns
    -------
    partition : tuple
        A pair of sets of nodes representing the bipartition.

    Raises
    -------
    NetworkXError
        If partition is not a valid partition of the nodes of the graph.

    References
    ----------
    .. [1] Kernighan, B. W.; Lin, Shen (1970).
       "An efficient heuristic procedure for partitioning graphs."
       *Bell Systems Technical Journal* 49: 291--307.
       Oxford University Press 2011.

    """
    # If no partition is provided, split the nodes randomly into a
    # balanced partition.
    if partition is None:
        nodes = list(G)
        seed.shuffle(nodes)
        h = len(nodes) // 2
        partition = (nodes[:h], nodes[h:])
    # Make a copy of the partition as a pair of sets.
    try:
        A, B = set(partition[0]), set(partition[1])
    except:
        raise ValueError('partition must be two sets')
    if not is_partition(G, (A, B)):
        raise nx.NetworkXError('partition invalid')
    for i in range(max_iter):
        # `gains` is a list of triples of the form (g, u, v) for each
        # node pair (u, v), where `g` is the gain of that node pair.
        gains = _kernighan_lin_pass(G, A, B, weight)
        csum = list(accumulate(g for g, u, v in gains))
        max_cgain = max(csum)
        if max_cgain <= 0:
            break
        # Get the node pairs up to the index of the maximum cumulative
        # gain, and collect each `u` into `anodes` and each `v` into
        # `bnodes`, for each pair `(u, v)`.
        index = csum.index(max_cgain)
        nodesets = islice(zip(*gains[:index + 1]), 1, 3)
        anodes, bnodes = (set(s) for s in nodesets)
        A |= bnodes
        A -= anodes
        B |= anodes
        B -= bnodes
    return A, B
Example #16
0
def modularity(G, communities, weight="weight"):
    r"""Returns the modularity of the given partition of the graph.

    Modularity is defined in [1]_ as

    .. math::

        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m}\right)
            \delta(c_i,c_j)

    where $m$ is the number of edges, $A$ is the adjacency matrix of
    `G`, $k_i$ is the degree of $i$ and $\delta(c_i, c_j)$
    is 1 if $i$ and $j$ are in the same community and 0 otherwise.

    According to [2]_ (and verified by some algebra) this can be reduced to

    .. math::
       Q = \sum_{c=1}^{n}
       \left[ \frac{L_c}{m} - \left( \frac{k_c}{2m} \right) ^2 \right]

    where the sum iterates over all communities $c$, $m$ is the number of edges,
    $L_c$ is the number of intra-community links for community $c$,
    $k_c$ is the sum of degrees of the nodes in community $c$.

    The second formula is the one actually used in calculation of the modularity.

    Parameters
    ----------
    G : NetworkX Graph

    communities : list or iterable of set of nodes
        These node sets must represent a partition of G's nodes.

    weight : string or None, optional (default="weight")
            The edge attribute that holds the numerical value used
            as a weight. If None or an edge does not have that attribute,
            then that edge has weight 1.

    Returns
    -------
    Q : float
        The modularity of the paritition.

    Raises
    ------
    NotAPartition
        If `communities` is not a partition of the nodes of `G`.

    Examples
    --------
    >>> import networkx.algorithms.community as nx_comm
    >>> G = nx.barbell_graph(3, 0)
    >>> nx_comm.modularity(G, [{0, 1, 2}, {3, 4, 5}])
    0.35714285714285715
    >>> nx_comm.modularity(G, nx_comm.label_propagation_communities(G))
    0.35714285714285715

    References
    ----------
    .. [1] M. E. J. Newman *Networks: An Introduction*, page 224.
       Oxford University Press, 2011.
    .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
       "Finding community structure in very large networks."
       Physical review E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
    """
    if not isinstance(communities, list):
        communities = list(communities)
    if not is_partition(G, communities):
        raise NotAPartition(G, communities)

    directed = G.is_directed()
    if directed:
        out_degree = dict(G.out_degree(weight=weight))
        in_degree = dict(G.in_degree(weight=weight))
        m = sum(out_degree.values())
        norm = 1 / m**2
    else:
        out_degree = in_degree = dict(G.degree(weight=weight))
        deg_sum = sum(out_degree.values())
        m = deg_sum / 2
        norm = 1 / deg_sum**2

    def community_contribution(community):
        comm = set(community)
        L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1)
                  if v in comm)

        out_degree_sum = sum(out_degree[u] for u in comm)
        in_degree_sum = sum(in_degree[u]
                            for u in comm) if directed else out_degree_sum

        return L_c / m - out_degree_sum * in_degree_sum * norm

    return sum(map(community_contribution, communities))
def silhouettes(G, particion, silencioso=False):
    """
    Calcula el valor de silhouette para cada nodo del grafo 'G' dada una
    partición 'particion' como lista de listas. Dicho valor está dado por
    
    s(i) = (b(i) - a(i)) / max(a(i), b(i))
    
    donde a(i) es la distancia media a todos los nodos del mismo cluster que i
    y b(i) es la mínima de las distancias medias a los distintos clusters a los
    cuales no pertenece i. Para mayor claridad, sea c_i el cluster al que
    pertenece i, y sea Q = particion - c_i el conjunto de los clusters a los cuales
    no pertenece i. Entonces se define
    
    b(i) = min{promedio{d(i,j) : j in cluster} : cluster in Q}
    
    b(i) también se suele llamar "distancia media al cluster más cercano".

    Input
    -----
    G : nx.Graph
    particion : list
        lista de listas. Cada sublista es un cluster y sus elementos son los
        nombres de los nodos que pertenecen a dicho cluster.
    Output
    ------
    output : list
        lista de listas. Cada sublista es un cluster y sus elementos son los
        valores de silhouette para cada nodo, preservando el orden del input.
    """
    if not is_partition(G, particion):
        raise NotAPartition(G, particion)

    ds = list(nx.all_pairs_shortest_path_length(G))
    d = lambda i, j: ds[i][1][j]
    # ds[i][1][j] es la distancia (longitud del camino más corto)
    # entre i y j

    n = G.order()
    nc = len(particion)
    # Creamos lista de lista con iguales longitudes que 'particion'
    s_values = [[[] for n in range(len(particion[m]))] for m in range(nc)]
    # Las listas vacías son "dummies" o "placeholders" para los valores
    # de silhouette, que irán reemplazándolas.
    nodos_to_indices = crear_nodos_to_indices(particion)
    # Recorremos los nodos en el ordenamiento global correspondiente
    # a la función distancia 'd'
    for i, nodo in enumerate(G.nodes()):
        m, n = nodos_to_indices[nodo]
        cluster_actual = particion[m]
        otros_clusters = (particion[l] for l in range(nc) if l != m)
        a = np.average([d(i, j) for j in cluster_actual])
        try:
            dists_interclusters = [np.average([d(i,j) for j in cluster if j != i]) \
                                                for cluster in otros_clusters]
        except KeyError:
            if not silencioso:
                print(
                    'El grafo no es conexo y la distancia entre algunos clusters',
                    'es infinita por lo que no se puede realizar por completo el',
                    'análisis de silhouettes. Devolviendo lista vacía.')
            return []
        try:
            b = min(dists_interclusters)
        except ValueError:
            if not silencioso:
                print(
                    'La partición tiene un solo elemento. Devolviendo lista vacía.'
                )
            return []
        s_values[m][n] = (b - a) / max(a, b)
    return s_values
Example #18
0
def main():

  # Load data
  if path.exists("../data/cmty_nodes.csv"):
    node_upload = "../data/cmty_nodes.csv"
  elif path.exists("../data/nodes.csv"):
    node_upload = "../data/cmty_nodes.csv"
  else:
    print("NO NODES TO UPLOAD!")
    assert(False)
  pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0)

  # Data in nice form
  headers = list(pd_nodes.columns)
  nodes = np.asarray(pd_nodes)

  # Load social network accordingly
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_nodes_from(range(nodes.shape[0]))
  G.add_edges_from(list(map(tuple, edges)))

  #first compute the best partition
  print("Computing Louvain Algorithm")
  start = timeit.default_timer()
  partition = community.best_partition(G)
  stop = timeit.default_timer()

  # Computing modularity
  num_cmtys = len(set(partition.values()))
  num_edges = edges.shape[0]
  cmtys = [[] for _ in range(num_cmtys)]
  node_to_cmty = np.zeros(len(partition)).astype(int)
  for i in range(len(node_to_cmty)):
    node_to_cmty[i] = partition[i]
    cmtys[partition[i]].append(i)

  # Load social network accordingly
  if path.exists("../data/youtube.graph"):
    FIn = snap.TFIn("../data/youtube.graph")
    social_network = snap.TNGraph.Load(FIn)
  else:
    social_network = data2dag(edges, nodes.shape[0])

  # Add communities to nodes
  col_name = "louvain_cmty"
  pd_nodes[col_name] = node_to_cmty
  pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t')

  '''
  modularity = 0
  for cmty in cmtys:
    Nodes = snap.TIntV()
    for elem in cmty:
      Nodes.Add(int(elem))
    modularity += snap.GetModularity(social_network, Nodes, num_edges)
  '''
  print("Calculating Modularity")
  assert(is_partition(G, cmtys))
  modul = modularity(G, cmtys)
  print("Results from Louvain:")
  print("Modularity:",modul)
  print("Number of clusters:",num_cmtys)
  print("Time elapsed:",stop - start)


  #drawing
  '''