def renumber_communities(self):
     """ Renumber the communities so that they are numbered 0,...,q-1 where q is
     the number of communities. This also removes any empty communities, as they
     will not be given a new number."""
     id_gen = ig.UniqueIdGenerator()
     self._membership = [id_gen[c] for c in self._membership]
     self.__init_admin()
     # Reset the length of the object, i.e. the number of communities
     self._len = len(self.community)
Esempio n. 2
0
    def Bipartite(cls,
                  graph,
                  resolution_parameter_01,
                  resolution_parameter_0=0,
                  resolution_parameter_1=0,
                  degree_as_node_size=False,
                  types='type',
                  **kwargs):
        """ Create three layers for bipartite partitions.

    This creates three layers for bipartite partition necessary for detecting
    communities in bipartite networks. These three layers should be passed to
    :func:`Optimiser.optimise_partition_multiplex` with
    ``layer_weights=[1,-1,-1]``.

    Parameters
    ----------
    graph : :class:`ig.Graph`
      Graph to define the bipartite partitions on.

    resolution_parameter_01 : double
      Resolution parameter for in between two classes.

    resolution_parameter_0 : double
      Resolution parameter for class 0.

    resolution_parameter_1 : double
      Resolution parameter for class 1.

    degree_as_node_size : boolean
      If ``True`` use degree as node size instead of 1, to mimic modularity,
      see `Notes <#notes-bipartite>`_.

    types : vertex attribute or list
      Indicator of the class for each vertex. If not 0, 1, it is automatically
      converted.

    **kwargs
      Additional arguments passed on to default constructor of
      :class:`CPMVertexPartition`.

    .. _notes-bipartite:

    Notes
    -----

    For bipartite networks, we would like to be able to set three different
    resolution parameters: one for within each class :math:`\\gamma_0,
    \\gamma_1`, and one for the links between classes, :math:`\\gamma_{01}`.
    Then the formulation would be

    .. math:: Q = \\sum_{ij}
       [A_{ij}
        - (\\gamma_0\\delta(s_i,0) + \\gamma_1\\delta(s_i,1)) \\delta(s_i,s_j)
        - \\gamma_{01}(1 - \\delta(s_i, s_j))
       ]\\delta(\\sigma_i, \\sigma_j)

    In terms of communities this is

    .. math:: Q = \\sum_c (e_c
                          - \\gamma_{01} 2 n_c(0) n_c(1)
                          - \\gamma_0 n^2_c(0)
                          - \\gamma_1 n^2_c(1))

    where :math:`n_c(0)` is the number of nodes in community :math:`c` of class 0
    (and similarly for 1) and :math:`e_c` is the number of edges within community
    :math:`c`. We denote by :math:`n_c = n_c(0) + n_c(1)` the total number of nodes
    in community :math:`c`.

    We achieve this by creating three layers : (1) all nodes have ``node_size =
    1`` and all relevant links; (2) only nodes of class 0 have ``node_size =
    1`` and no links; (3) only nodes of class 1 have ``node_size = 1`` and no
    links. If we add the first with resolution parameter :math:`\\gamma_{01}`,
    and the others with resolution parameters :math:`\\gamma_{01} - \\gamma_0`
    and :math:`\\gamma_{01} - \\gamma_1`, but the latter two with a layer
    weight of -1 while the first layer has layer weight 1, we obtain the
    following:

    .. math:: Q &=  \\sum_c (e_c - \\gamma_{01} n_c^2)
                   -\\sum_c (- (\\gamma_{01} - \\gamma_0) n_c(0)^2)
                   -\\sum_c (- (\\gamma_{01} - \\gamma_1) n_c(1)^2) \\\\
                &=  \\sum_c [e_c - \\gamma_{01} 2 n_c(0) n_c(1)
                                 - \\gamma_{01} n_c(0)^2
                                 - \\gamma_{01} n_c(1)^2)
                                 + ( \\gamma_{01} - \\gamma_0) n_c(0)^2
                                 + ( \\gamma_{01} - \\gamma_1) n_c(1)^2
                           ] \\\\
                &=  \\sum_c [e_c - \\gamma_{01} 2 n_c(0) n_c(1)
                                 - \\gamma_{0} n_c(0)^2
                                 - \\gamma_{1} n_c(1)^2]

    Although the derivation above is using :math:`n_c^2`, implicitly assuming a
    direct graph with self-loops, similar derivations can be made for
    undirected graphs using :math:`\\binom{n_c}{2}`, but the notation is then
    somewhat more convoluted.

    If we set node sizes equal to the degree, we get something similar to
    modularity, except that the resolution parameter should still be divided by
    :math:`2m`. In particular, in general (i.e. not specifically for bipartite
    graph) if ``node_sizes=G.degree()`` we then obtain

    .. math:: Q = \\sum_{ij} A_{ij} - \\gamma k_i k_j

    In the case of bipartite graphs something similar is obtained, but then
    correctly adapted (as long as the resolution parameter is also
    appropriately rescaled).

    .. note:: This function is not suited for directed graphs in the case of
              using the degree as node sizes.
    """

        if types is not None:
            if isinstance(types, str):
                types = graph.vs[types]
            else:
                # Make sure it is a list
                types = list(types)

        if set(types) != set([0, 1]):
            new_type = _ig.UniqueIdGenerator()
            types = [new_type[t] for t in types]

        if set(types) != set([0, 1]):
            raise ValueError("More than one type specified.")

        if degree_as_node_size:
            if (graph.is_directed()):
                raise ValueError(
                    "This method is not suitable for directed graphs " +
                    "when using degree as node sizes.")
            node_sizes = graph.degree()
        else:
            node_sizes = [1] * graph.vcount()

        partition_01 = cls(graph,
                           node_sizes=node_sizes,
                           resolution_parameter=resolution_parameter_01,
                           **kwargs)
        H_0 = graph.subgraph_edges([], delete_vertices=False)
        partition_0 = cls(H_0,
                          weights=None,
                          node_sizes=[
                              s if t == 0 else 0
                              for v, s, t in zip(graph.vs, node_sizes, types)
                          ],
                          resolution_parameter=resolution_parameter_01 -
                          resolution_parameter_0)
        H_1 = graph.subgraph_edges([], delete_vertices=False)
        partition_1 = cls(H_1,
                          weights=None,
                          node_sizes=[
                              s if t == 1 else 0
                              for v, s, t in zip(graph.vs, node_sizes, types)
                          ],
                          resolution_parameter=resolution_parameter_01 -
                          resolution_parameter_1)
        return partition_01, partition_0, partition_1
Esempio n. 3
0
def find_partition(graph,
                   method,
                   initial_membership=None,
                   weight=None,
                   resolution_parameter=1.0,
                   consider_comms=ALL_NEIGH_COMMS):
    """
  Method for detecting communities using the Louvain algorithm. This functions
  finds the optimal partition given the specified method. For the various possible
  methods see package documentation.

  Keyword arguments:

  graph
    The graph for which to find the optimal partition.

  method
    The type of partition which will be used during optimisation.

  initial_membership=None
    If provided, the optimisation will start with this initial membership.
    Should be a list that contains any unique identified for a community, which
    is converted to a numeric representation. Since communities can never be
    split, the number of communities in this initial partition provides an upper
    bound.

  weight=None
    If provided, indicates the edge attribute to use as a weight. (N.B. note
    that Significance is not suited for weighted graphs).

  resolution_parameter=1.0
    For those methods that use a resolution parameter, this is indicated here.

  consider_comms=ALL_NEIGH_COMMS
    This parameter determines which communities to consider when moving a node.

    ALL_COMMS
      Consider all communities always.

    ALL_NEIGH_COMMS
      Consider all communities of the neighbours

    RAND_COMM
      Consider only a single random community

    ALL_NEIGH_COMMS
      Consider only a single random community of the neighbours. Notice that
      this is sampled from the set of all neighbours so that the communities are
      sampled with respective frequency.

    In ordinary cases it is usually not necessary to alter this parameter. The
    default choice of considering all communities of the neighbours works
    relatively well, and is relatively fast. However, in the case of negative
    weights, it may be better to move a node to a community to which it is not
    connected, so that one would need to consider all communities.
    Alternatively, by only selecting a single random community from the
    neighbours to consider, one can considerably speed up the algorithm, without
    loosing too much quality.

  The quality of the partition, as measured by the indicated method is
  provided in the returned partition as partition.quality.

  returns: optimized partition."""
    pygraph_t = __get_py_capsule(graph)
    if weight is not None:
        if isinstance(weight, str):
            weight = graph.es[weight]
        else:
            # Make sure it is a list
            weight = list(weight)
    if initial_membership is not None:
        gen = _ig.UniqueIdGenerator()
        initial_membership = [gen[m] for m in initial_membership]
    membership, quality = _c_louvain._find_partition(pygraph_t, method,
                                                     initial_membership,
                                                     weight,
                                                     resolution_parameter,
                                                     consider_comms)
    partition = _ig.VertexClustering(graph, membership)
    partition.quality = quality
    return partition
def igraph_from_pandas(edges_table,
                       vertices_table,
                       source_cl='from',
                       target_cl='to',
                       vertex_attrs=None,
                       vertex_id_cl='v_id',
                       directed=False):
    """igraph_from_pandas(edges_table, vertices_table, source_cl='from', target_cl='to', vertex_attrs=None, vertex_id_cl='v_id', directed=False)
    
    Takes two pandas dataframes, vertices and edges tables, and builds igraph graph including both vertex and edge attributes. (imports pandas as pd and igraph as ig, they should be installed).
    
    @param edges_table: Pandas dataframe with two columns as "from" and "to" nodes of each edge. If name of columns are different, they should to be declared using following parameters.
    @param vertices_table:
    @param source_cl: Name of column including starting nodes of each edge defaults to "from".
    @param target_cl: Name of column including ending nodes of each edge defaults to "to".
    @param vertex_attrs: List of pandas column names to be used as node attributes to be added to graph. Defaults to None as no attribute is needed and node names are taken from edges_table (if string) and adds to graph as "name" (e.g., g.vs['name']. If you want all vertices table columns to be used, pass list(vertices_table.columns)
    @param vertex_id_cl: Name of column in vertices_table which includes vertices names to be used while adding attributes. Defaults to "v_id".
    @param directed: bool, should the network be directed? It is passed to igraph.Graph. Defaults to False.
    
    """

    import pandas as pd
    import igraph as ig
    # control parameters
    if isinstance(edges_table, pd.DataFrame):
        try:
            if source_cl and target_cl in edges_table.columns:
                id_gen = ig.UniqueIdGenerator()
                edgelist = []
                for start_edge, end_edge in edges_table[[
                        source_cl, target_cl
                ]].itertuples(index=False, name=None):
                    edgelist.append((id_gen[start_edge], id_gen[end_edge]))
                if directed:
                    gg = ig.Graph(edgelist, directed=True)
                else:
                    gg = ig.Graph(edgelist, directed=False)
                gg.vs["name"] = id_gen.values()
        except (KeyError, NameError):
            raise ValueError('Edges columns missing!')
    else:
        raise ValueError("edges table is required!")
    if isinstance(vertices_table, pd.DataFrame):
        if not vertex_attrs:
            raise ValueError(
                'No attributes provided. Remove vertices table from arguments and try again.'
            )
        else:
            try:
                # order vertices table based on edge_list
                vertices_table_ordered = pd.DataFrame(id_gen.values(),
                                                      columns=['unique_id'])
                # bring previous vertices table with attributes (to be reordered)
                vertices_table_ordered = vertices_table_ordered.merge(
                    vertices_table,
                    left_on='unique_id',
                    right_on=vertex_id_cl,
                    how='left')
                for attr2use in vertex_attrs:
                    if attr2use in vertices_table.columns:
                        # add attributes to graph
                        gg.vs[attr2use] = vertices_table_ordered[
                            attr2use].values.tolist()
            except (KeyError, NameError):
                raise ValueError('Vertex ID column missing!')
    return gg