def renumber_communities(self): """ Renumber the communities so that they are numbered 0,...,q-1 where q is the number of communities. This also removes any empty communities, as they will not be given a new number.""" id_gen = ig.UniqueIdGenerator() self._membership = [id_gen[c] for c in self._membership] self.__init_admin() # Reset the length of the object, i.e. the number of communities self._len = len(self.community)
def Bipartite(cls, graph, resolution_parameter_01, resolution_parameter_0=0, resolution_parameter_1=0, degree_as_node_size=False, types='type', **kwargs): """ Create three layers for bipartite partitions. This creates three layers for bipartite partition necessary for detecting communities in bipartite networks. These three layers should be passed to :func:`Optimiser.optimise_partition_multiplex` with ``layer_weights=[1,-1,-1]``. Parameters ---------- graph : :class:`ig.Graph` Graph to define the bipartite partitions on. resolution_parameter_01 : double Resolution parameter for in between two classes. resolution_parameter_0 : double Resolution parameter for class 0. resolution_parameter_1 : double Resolution parameter for class 1. degree_as_node_size : boolean If ``True`` use degree as node size instead of 1, to mimic modularity, see `Notes <#notes-bipartite>`_. types : vertex attribute or list Indicator of the class for each vertex. If not 0, 1, it is automatically converted. **kwargs Additional arguments passed on to default constructor of :class:`CPMVertexPartition`. .. _notes-bipartite: Notes ----- For bipartite networks, we would like to be able to set three different resolution parameters: one for within each class :math:`\\gamma_0, \\gamma_1`, and one for the links between classes, :math:`\\gamma_{01}`. Then the formulation would be .. math:: Q = \\sum_{ij} [A_{ij} - (\\gamma_0\\delta(s_i,0) + \\gamma_1\\delta(s_i,1)) \\delta(s_i,s_j) - \\gamma_{01}(1 - \\delta(s_i, s_j)) ]\\delta(\\sigma_i, \\sigma_j) In terms of communities this is .. math:: Q = \\sum_c (e_c - \\gamma_{01} 2 n_c(0) n_c(1) - \\gamma_0 n^2_c(0) - \\gamma_1 n^2_c(1)) where :math:`n_c(0)` is the number of nodes in community :math:`c` of class 0 (and similarly for 1) and :math:`e_c` is the number of edges within community :math:`c`. We denote by :math:`n_c = n_c(0) + n_c(1)` the total number of nodes in community :math:`c`. We achieve this by creating three layers : (1) all nodes have ``node_size = 1`` and all relevant links; (2) only nodes of class 0 have ``node_size = 1`` and no links; (3) only nodes of class 1 have ``node_size = 1`` and no links. If we add the first with resolution parameter :math:`\\gamma_{01}`, and the others with resolution parameters :math:`\\gamma_{01} - \\gamma_0` and :math:`\\gamma_{01} - \\gamma_1`, but the latter two with a layer weight of -1 while the first layer has layer weight 1, we obtain the following: .. math:: Q &= \\sum_c (e_c - \\gamma_{01} n_c^2) -\\sum_c (- (\\gamma_{01} - \\gamma_0) n_c(0)^2) -\\sum_c (- (\\gamma_{01} - \\gamma_1) n_c(1)^2) \\\\ &= \\sum_c [e_c - \\gamma_{01} 2 n_c(0) n_c(1) - \\gamma_{01} n_c(0)^2 - \\gamma_{01} n_c(1)^2) + ( \\gamma_{01} - \\gamma_0) n_c(0)^2 + ( \\gamma_{01} - \\gamma_1) n_c(1)^2 ] \\\\ &= \\sum_c [e_c - \\gamma_{01} 2 n_c(0) n_c(1) - \\gamma_{0} n_c(0)^2 - \\gamma_{1} n_c(1)^2] Although the derivation above is using :math:`n_c^2`, implicitly assuming a direct graph with self-loops, similar derivations can be made for undirected graphs using :math:`\\binom{n_c}{2}`, but the notation is then somewhat more convoluted. If we set node sizes equal to the degree, we get something similar to modularity, except that the resolution parameter should still be divided by :math:`2m`. In particular, in general (i.e. not specifically for bipartite graph) if ``node_sizes=G.degree()`` we then obtain .. math:: Q = \\sum_{ij} A_{ij} - \\gamma k_i k_j In the case of bipartite graphs something similar is obtained, but then correctly adapted (as long as the resolution parameter is also appropriately rescaled). .. note:: This function is not suited for directed graphs in the case of using the degree as node sizes. """ if types is not None: if isinstance(types, str): types = graph.vs[types] else: # Make sure it is a list types = list(types) if set(types) != set([0, 1]): new_type = _ig.UniqueIdGenerator() types = [new_type[t] for t in types] if set(types) != set([0, 1]): raise ValueError("More than one type specified.") if degree_as_node_size: if (graph.is_directed()): raise ValueError( "This method is not suitable for directed graphs " + "when using degree as node sizes.") node_sizes = graph.degree() else: node_sizes = [1] * graph.vcount() partition_01 = cls(graph, node_sizes=node_sizes, resolution_parameter=resolution_parameter_01, **kwargs) H_0 = graph.subgraph_edges([], delete_vertices=False) partition_0 = cls(H_0, weights=None, node_sizes=[ s if t == 0 else 0 for v, s, t in zip(graph.vs, node_sizes, types) ], resolution_parameter=resolution_parameter_01 - resolution_parameter_0) H_1 = graph.subgraph_edges([], delete_vertices=False) partition_1 = cls(H_1, weights=None, node_sizes=[ s if t == 1 else 0 for v, s, t in zip(graph.vs, node_sizes, types) ], resolution_parameter=resolution_parameter_01 - resolution_parameter_1) return partition_01, partition_0, partition_1
def find_partition(graph, method, initial_membership=None, weight=None, resolution_parameter=1.0, consider_comms=ALL_NEIGH_COMMS): """ Method for detecting communities using the Louvain algorithm. This functions finds the optimal partition given the specified method. For the various possible methods see package documentation. Keyword arguments: graph The graph for which to find the optimal partition. method The type of partition which will be used during optimisation. initial_membership=None If provided, the optimisation will start with this initial membership. Should be a list that contains any unique identified for a community, which is converted to a numeric representation. Since communities can never be split, the number of communities in this initial partition provides an upper bound. weight=None If provided, indicates the edge attribute to use as a weight. (N.B. note that Significance is not suited for weighted graphs). resolution_parameter=1.0 For those methods that use a resolution parameter, this is indicated here. consider_comms=ALL_NEIGH_COMMS This parameter determines which communities to consider when moving a node. ALL_COMMS Consider all communities always. ALL_NEIGH_COMMS Consider all communities of the neighbours RAND_COMM Consider only a single random community ALL_NEIGH_COMMS Consider only a single random community of the neighbours. Notice that this is sampled from the set of all neighbours so that the communities are sampled with respective frequency. In ordinary cases it is usually not necessary to alter this parameter. The default choice of considering all communities of the neighbours works relatively well, and is relatively fast. However, in the case of negative weights, it may be better to move a node to a community to which it is not connected, so that one would need to consider all communities. Alternatively, by only selecting a single random community from the neighbours to consider, one can considerably speed up the algorithm, without loosing too much quality. The quality of the partition, as measured by the indicated method is provided in the returned partition as partition.quality. returns: optimized partition.""" pygraph_t = __get_py_capsule(graph) if weight is not None: if isinstance(weight, str): weight = graph.es[weight] else: # Make sure it is a list weight = list(weight) if initial_membership is not None: gen = _ig.UniqueIdGenerator() initial_membership = [gen[m] for m in initial_membership] membership, quality = _c_louvain._find_partition(pygraph_t, method, initial_membership, weight, resolution_parameter, consider_comms) partition = _ig.VertexClustering(graph, membership) partition.quality = quality return partition
def igraph_from_pandas(edges_table, vertices_table, source_cl='from', target_cl='to', vertex_attrs=None, vertex_id_cl='v_id', directed=False): """igraph_from_pandas(edges_table, vertices_table, source_cl='from', target_cl='to', vertex_attrs=None, vertex_id_cl='v_id', directed=False) Takes two pandas dataframes, vertices and edges tables, and builds igraph graph including both vertex and edge attributes. (imports pandas as pd and igraph as ig, they should be installed). @param edges_table: Pandas dataframe with two columns as "from" and "to" nodes of each edge. If name of columns are different, they should to be declared using following parameters. @param vertices_table: @param source_cl: Name of column including starting nodes of each edge defaults to "from". @param target_cl: Name of column including ending nodes of each edge defaults to "to". @param vertex_attrs: List of pandas column names to be used as node attributes to be added to graph. Defaults to None as no attribute is needed and node names are taken from edges_table (if string) and adds to graph as "name" (e.g., g.vs['name']. If you want all vertices table columns to be used, pass list(vertices_table.columns) @param vertex_id_cl: Name of column in vertices_table which includes vertices names to be used while adding attributes. Defaults to "v_id". @param directed: bool, should the network be directed? It is passed to igraph.Graph. Defaults to False. """ import pandas as pd import igraph as ig # control parameters if isinstance(edges_table, pd.DataFrame): try: if source_cl and target_cl in edges_table.columns: id_gen = ig.UniqueIdGenerator() edgelist = [] for start_edge, end_edge in edges_table[[ source_cl, target_cl ]].itertuples(index=False, name=None): edgelist.append((id_gen[start_edge], id_gen[end_edge])) if directed: gg = ig.Graph(edgelist, directed=True) else: gg = ig.Graph(edgelist, directed=False) gg.vs["name"] = id_gen.values() except (KeyError, NameError): raise ValueError('Edges columns missing!') else: raise ValueError("edges table is required!") if isinstance(vertices_table, pd.DataFrame): if not vertex_attrs: raise ValueError( 'No attributes provided. Remove vertices table from arguments and try again.' ) else: try: # order vertices table based on edge_list vertices_table_ordered = pd.DataFrame(id_gen.values(), columns=['unique_id']) # bring previous vertices table with attributes (to be reordered) vertices_table_ordered = vertices_table_ordered.merge( vertices_table, left_on='unique_id', right_on=vertex_id_cl, how='left') for attr2use in vertex_attrs: if attr2use in vertices_table.columns: # add attributes to graph gg.vs[attr2use] = vertices_table_ordered[ attr2use].values.tolist() except (KeyError, NameError): raise ValueError('Vertex ID column missing!') return gg